PyPI - ORForise - Versions diffs - 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

ORForise 1.4.3py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ORForise/Aggregate_Compare.py +318 -133
ORForise/Annotation_Compare.py +243 -125
ORForise/Comparator.py +600 -552
ORForise/ORForise_Analysis/genome_Metrics.py +51 -33
ORForise/Tools/Augustus/Augustus.py +30 -23
ORForise/Tools/Balrog/Balrog.py +31 -23
ORForise/Tools/EasyGene/EasyGene.py +30 -22
ORForise/Tools/FGENESB/FGENESB.py +32 -25
ORForise/Tools/FragGeneScan/FragGeneScan.py +29 -22
ORForise/Tools/GFF/GFF.py +51 -47
ORForise/Tools/GLIMMER_3/GLIMMER_3.py +34 -27
ORForise/Tools/GeneMark/GeneMark.py +46 -40
ORForise/Tools/GeneMark_HA/GeneMark_HA.py +29 -22
ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +29 -22
ORForise/Tools/GeneMark_S/GeneMark_S.py +29 -22
ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +29 -25
ORForise/Tools/MetaGene/MetaGene.py +29 -22
ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +30 -23
ORForise/Tools/MetaGeneMark/MetaGeneMark.py +30 -23
ORForise/Tools/Prodigal/Prodigal.py +30 -26
ORForise/Tools/Prokka/Prokka.py +30 -25
ORForise/Tools/StORF_Reporter/StORF_Reporter.py +33 -26
ORForise/Tools/TransDecoder/TransDecoder.py +29 -22
ORForise/utils.py +204 -2
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/METADATA +5 -5
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/RECORD +30 -30
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/entry_points.txt +5 -0
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/WHEEL +0 -0
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/licenses/LICENSE +0 -0
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/top_level.txt +0 -0

ORForise/ORForise_Analysis/genome_Metrics.py CHANGED Viewed

@@ -1,12 +1,13 @@
 import argparse
-import csv
 import numpy as np
+import os
+try:
+    from ORForise.src.ORForise.utils import *  # local file
+except ImportError:
+    from ORForise.utils import *
-from ORForise.src.ORForise.utils import *  # local file
-parser = argparse.ArgumentParser()
-parser.add_argument('-g', '--genome_to_compare', default='', help='Which genome to analyse?')
-args = parser.parse_args()
 def start_Codon_Count(start_Codons):
@@ -39,7 +40,6 @@ def stop_Codon_Count(stop_Codons):
     tag, taa, tga, other = 0, 0, 0, 0
     other_Stops = []
     for stop in stop_Codons:
-        stop
         if stop == 'TAG':
             tag += 1
         elif stop == 'TAA':
@@ -83,14 +83,19 @@ def revCompIterative(watson):
     watson = watson.upper()
     watsonrev = watson[::-1]
     crick = ""
     for nt in watsonrev:
         crick += complements[nt]
     return crick
-def genome_Metrics(genome_to_compare):
+def genome_Metrics(fasta_in, gff_in, output_file):
+    base_name = os.path.basename(fasta_in)  # Gets file name with extension
+    genome_name = os.path.splitext(base_name)[0]  # Removes extension
     genome_Seq = ""
-    with open('../Genomes/' + genome_to_compare + '.fa', 'r') as genome:
+    with open(fasta_in , 'r') as genome:
         for line in genome:
             line = line.replace("\n", "")
             if not line.startswith('>'):
@@ -100,16 +105,16 @@ def genome_Metrics(genome_to_compare):
     genome_Rev = revCompIterative(genome_Seq)
     genome_Size = len(genome_Seq)
-    coding_Regions = np.zeros((genome_Size), dtype=np.int)
-    non_Coding_Regions = np.zeros((genome_Size), dtype=np.int)
-    all_gene_Regions = np.zeros((genome_Size), dtype=np.int)
+    coding_Regions = np.zeros((genome_Size), dtype=int)
+    non_Coding_Regions = np.zeros((genome_Size), dtype=int)
+    all_gene_Regions = np.zeros((genome_Size), dtype=int)
     protein_coding_genes = collections.OrderedDict()
     non_protein_coding_genes = collections.OrderedDict()
     strands = collections.defaultdict(int)
     lengths_PCG, gene_Pos_Olap, gene_Neg_Olap, short_PCGs, pcg_GC = [], [], [], [], []
     prev_Gene_Stop, count, nc_Count, pos_Strand, neg_Strand = 0, 0, 0, 0, 0
     prev_Gene_Overlapped = False
-    with open('../Genomes/' + genome_to_compare + '.gff', 'r') as genome_gff:
+    with open(gff_in, 'r') as genome_gff:
         for line in genome_gff:
             line = line.split('\t')
             try:
@@ -200,7 +205,7 @@ def genome_Metrics(genome_to_compare):
     atg_P, gtg_P, ttg_P, att_P, ctg_P, other_Start_P, other_Starts = start_Codon_Count(start_Codons)
     tag_P, taa_P, tga_P, other_Stop_P, other_Stops = stop_Codon_Count(stop_Codons)
-    output = ("Number of Protein Coding Genes in " + genome_to_compare + " : " + str(
+    output = ("Number of Protein Coding Genes in " + genome_name + " : " + str(
         len(lengths_PCG)) + " ,Median Length of PCGs: " + str(median_PCG) + ", Min Length of PCGs: " + str(
         min(lengths_PCG)) + ", Max Length of PCGs: " + str(max(lengths_PCG)) +
               ", Number of PCGs on Pos Strand: " + str(strands['+']) + ", Number of PCGs on Neg Strand: " + str(
@@ -210,31 +215,44 @@ def genome_Metrics(genome_to_compare):
               ", Longest PCG Overlap: " + str(longest_Olap) + ", Median PCG Overlap: " + str(
                 median_PCG_Olap) + ", Number of PCGs less than 100 amino acids: " + str(len(short_PCGs)) +
-              '\nPercentage of Genome which is Protein Coding: ' + format(coding_Percentage,
-                                                                          '.2f') + ', Number of Non-PCGs: ' + str(
-                len(non_protein_coding_genes)) + ', Percentage of Genome Non-PCG: ' + format(non_coding_Percentage,
+              "\nPercentage of Genome which is Protein Coding: " + format(coding_Percentage,
+                                                                          '.2f') + ", Number of Non-PCGs: " + str(
+                len(non_protein_coding_genes)) + ", Percentage of Genome Non-PCG: " + format(non_coding_Percentage,
                                                                                              '.2f') +
-              ', Percentage of All Genes in Genome: ' + format(all_gene_Percentage, '.2f') +
+              ", Percentage of All Genes in Genome: " + format(all_gene_Percentage, '.2f') +
+              "\nPercentage of Genes starting with ATG: " + atg_P +
+              "\nPercentage of Genes starting with GTG: " + gtg_P +
+              "\nPercentage of Genes starting with TTG: " + ttg_P +
+              "\nPercentage of Genes starting with ATT: " + att_P +
+              "\nPercentage of Genes starting with CTG: " + ctg_P +
+              "\nPercentage of Genes starting with Alternative Start Codon: " + other_Start_P +
+              "\nPercentage of Genes ending with TAG: " + tag_P +
+              "\nPercentage of Genes ending with TAA: " + taa_P +
+              "\nPercentage of Genes ending with TGA: " + tga_P +
+              "\nPercentage of Genes ending with Alternative Stop Codon: " + other_Stop_P)
+    with open(output_file, 'w') as out_file:
+        out_file.write('Genome Metrics:\n')
+        out_file.write(output + '\n')
+    #print(output)
-              '\nPercentage of Genes starting with ATG: ' + atg_P +
-              '\nPercentage of Genes starting with GTG: ' + gtg_P +
-              '\nPercentage of Genes starting with TTG: ' + ttg_P +
-              '\nPercentage of Genes starting with ATT: ' + att_P +
-              '\nPercentage of Genes starting with CTG: ' + ctg_P +
-              '\nPercentage of Genes starting with Alternative Start Codon: ' + other_Start_P +
+def main():
+    parser = argparse.ArgumentParser(description="...")
+    parser.add_argument("-f", dest='fasta_in', required=True, help="Input FASTA file")
+    parser.add_argument("-g", dest='gff_in', required=True, help="Corresponding GFF file to FASTA")
+    parser.add_argument("-o", dest='output_file', required=True, help="Output metrics file")
-              '\nPercentage of Genes ending with TAG: ' + tag_P +
-              '\nPercentage of Genes ending with TAA: ' + taa_P +
-              '\nPercentage of Genes ending with TGA: ' + tga_P +
-              '\nPercentage of Genes ending with Alternative Stop Codon: ' + other_Stop_P)
+    options = parser.parse_args()
-    with open('../Genomes/' + genome_to_compare + '_metrics.csv', 'w') as out_file:
-        out = csv.writer(out_file, delimiter=',')
-        out.writerow(['Genome Metrics:'])
-        out.writerow([output])
+    genome_Metrics(options.fasta_in, options.gff_in, options.output_file)
-    print(output)
 if __name__ == "__main__":
-    genome_Metrics(**vars(args))
+   main()

ORForise/Tools/Augustus/Augustus.py CHANGED Viewed

@@ -8,28 +8,35 @@ except ImportError:
     from ORForise.utils import sortORFs
-def Augustus(tool_pred, genome):
+def Augustus(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     augustus_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as Augustus_input:
-        for line in Augustus_input:
-            line = line.split()
-            if len(line) == 12 and "CDS" in line[2]:
-                start = int(line[3])
-                stop = int(line[4])
-                strand = line[6]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                augustus_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        augustus_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as Augustus_input:
+            for line in Augustus_input:
+                line = line.split()
+                if len(line) == 12 and dna_region in line[0] and "CDS" in line[2]:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'Augustus']
+                    augustus_ORFs.update({po: orf})
-    augustus_ORFs = sortORFs(augustus_ORFs)
-    return augustus_ORFs
+        for group in augustus_ORFs:
+            augustus_ORFs[group] = sortORFs(augustus_ORFs[group])
+        return augustus_ORFs

ORForise/Tools/Balrog/Balrog.py CHANGED Viewed

@@ -8,29 +8,37 @@ except ImportError:
     from ORForise.utils import sortORFs
-def Balrog(tool_pred, genome):
+def Balrog(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     Balrog_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as Balrog_input:
-        for line in Balrog_input:
-            if '#' not in line:
-                line = line.split('\t')
-                if "CDS" in line[2]:
-                    start = int(line[3])
-                    stop = int(line[4])
-                    strand = line[6]
-                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in strand:
-                        startCodon = genome[start - 1:start + 2]
-                        stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, 'CDS']
-                    Balrog_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        Balrog_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
-    Balrog_ORFs = sortORFs(Balrog_ORFs)
+        with open(tool_pred, 'r') as Balrog_input:
+            for line in Balrog_input:
+                if '#' not in line:
+                    line = line.split('\t')
+                    if "CDS" in line[2] and dna_region in line[0]:
+                        start = int(line[3])
+                        stop = int(line[4])
+                        strand = line[6]
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in strand:
+                            startCodon = genome[start - 1:start + 2]
+                            stopCodon = genome[stop - 3:stop]
+                        po = str(start) + ',' + str(stop)
+                        orf = [strand, startCodon, stopCodon, 'CDS', 'Balrog']
+                        Balrog_ORFs.update({po: orf})
+    for group in Balrog_ORFs:
+        Balrog_ORFs[group] = sortORFs(Balrog_ORFs[group])
     return Balrog_ORFs

ORForise/Tools/EasyGene/EasyGene.py CHANGED Viewed

@@ -8,28 +8,36 @@ except ImportError:
     from ORForise.utils import sortORFs
-def EasyGene(tool_pred, genome):
+def EasyGene(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     easyGene_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as EasyGene_input:
-        for line in EasyGene_input:
-            line = line.split()
-            if len(line) == 10 and "CDS" in line[2]:
-                start = int(line[3])
-                stop = int(line[4])
-                strand = line[6]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                easyGene_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        easyGene_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as EasyGene_input:
+            for line in EasyGene_input:
+                line = line.split()
+                if len(line) == 10 and dna_region in line[0] and "CDS" in line[2]:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    info = line[8]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'EasyGene']
+                    easyGene_ORFs[dna_region].update({po: orf})
-    easyGene_ORFs = sortORFs(easyGene_ORFs)
+    for group in easyGene_ORFs:
+        easyGene_ORFs[group] = sortORFs(easyGene_ORFs[group])
     return easyGene_ORFs

ORForise/Tools/FGENESB/FGENESB.py CHANGED Viewed

@@ -8,31 +8,38 @@ except ImportError:
     from ORForise.utils import sortORFs
-def FGENESB(tool_pred, genome):
+def FGENESB(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     FGENESB_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as FGENESB_input:
-        for line in FGENESB_input:
-            if '>GENE' in line:
-                line = line.split()
-                if '2208' in line:
-                    print("ss")
-                if len(line) == 10 and ">GENE" in line[0]:
-                    start = int(line[2])
-                    stop = int(line[4])
-                    strand = line[9]
-                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in strand:
-                        startCodon = genome[start - 1:start + 2]
-                        stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, 'CDS']
-                    FGENESB_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        FGENESB_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as FGENESB_input:
+            for line in FGENESB_input:
+                if '>GENE' in line:
+                    line = line.split()
+                    if '2208' in line:
+                        print("ss")
+                    if len(line) == 10 and dna_region in line[0] and ">GENE" in line[0]:
+                        start = int(line[2])
+                        stop = int(line[4])
+                        strand = line[9]
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in strand:
+                            startCodon = genome[start - 1:start + 2]
+                            stopCodon = genome[stop - 3:stop]
+                        po = str(start) + ',' + str(stop)
+                        orf = [strand, startCodon, stopCodon, 'CDS', 'FGENESB']
+                        FGENESB_ORFs.update({po: orf})
-    FGENESB_ORFs = sortORFs(FGENESB_ORFs)
+    for group in FGENESB_ORFs:
+        FGENESB_ORFs[group] = sortORFs(FGENESB_ORFs[group])
     return FGENESB_ORFs

ORForise/Tools/FragGeneScan/FragGeneScan.py CHANGED Viewed

@@ -8,28 +8,35 @@ except ImportError:
     from ORForise.utils import sortORFs
-def FragGeneScan(tool_pred, genome):
+def FragGeneScan(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     fragGeneScan_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as fragGeneScan_input:
-        for line in fragGeneScan_input:
-            line = line.split()
-            if len(line) == 10 and "FGS" in line[1] and "CDS" in line[2]:
-                start = int(line[3])
-                stop = int(line[4])
-                strand = line[6]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                fragGeneScan_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        fragGeneScan_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as fragGeneScan_input:
+            for line in fragGeneScan_input:
+                line = line.split()
+                if len(line) == 10 and "FGS" in line[1] and "CDS" in line[2] and dna_region in line[0]:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'FragGeneScan']
+                    fragGeneScan_ORFs.update({po: orf})
-    fragGeneScan_ORFs = sortORFs(fragGeneScan_ORFs)
+    for group in fragGeneScan_ORFs:
+        fragGeneScan_ORFs[group] = sortORFs(fragGeneScan_ORFs[group])
     return fragGeneScan_ORFs

ORForise/Tools/GFF/GFF.py CHANGED Viewed

@@ -10,53 +10,57 @@ except ImportError:
 def GFF(*args):
     tool_pred = args[0]
-    genome = args[1]
-    #types = args[2]
+    dna_regions = args[1]
     GFF_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as gff_input:
-        for line in gff_input:
-            if '#' not in line:
-                line = line.split('\t')
-                #gene_types = types.split(',') - Temporary fix
-                #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9:  # line[2] for normalrun
-                if 'CDS' in line[2] and len(line) == 9:
-                    start = int(line[3])
-                    stop = int(line[4])
-                    strand = line[6]
-                    info = line[8]
-                    if stop >= genome_size:
-                        extra_stop = stop - genome_size
-                        corrected_stop = genome_size
-                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                            r_start = genome_size - corrected_stop
-                            r_stop = genome_size - start
-                            seq = genome_rev[r_start:r_stop + 1]
-                            extra_seq = genome_rev[-extra_stop - 1:]
-                            seq = extra_seq+seq
-                            startCodon = seq[:3]
-                            stopCodon = seq[-3:]
-                        elif '+' in strand:
-                            seq = genome[start -1 :corrected_stop]
-                            extra_seq = genome[:extra_stop +1]
-                            seq = seq+extra_seq
-                            startCodon = seq[:3]
-                            stopCodon = seq[-3:]
-                    else:
-                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                            r_start = genome_size - stop
-                            r_stop = genome_size - start
-                            startCodon = genome_rev[r_start:r_start + 3]
-                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                        elif '+' in strand:
-                            startCodon = genome[start - 1:start + 2]
-                            stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
-                    GFF_ORFs.update({po: orf})
-                # elif "CDS" in line[2]:
-                #     sys.exit("SAS")
+    for dna_region in dna_regions:
+        GFF_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as gff_input:
+            for line in gff_input:
+                if '#' not in line:
+                    line = line.split('\t')
+                    #gene_types = types.split(',') - Temporary fix
+                    #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9:  # line[2] for normalrun
+                    if 'CDS' in line[2] and len(line) == 9 and dna_region in line[0]:
+                        start = int(line[3])
+                        stop = int(line[4])
+                        strand = line[6]
+                        info = line[8]
+                        if stop >= genome_size:
+                            extra_stop = stop - genome_size
+                            corrected_stop = genome_size
+                            if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                                r_start = genome_size - corrected_stop
+                                r_stop = genome_size - start
+                                seq = genome_rev[r_start:r_stop + 1]
+                                extra_seq = genome_rev[-extra_stop - 1:]
+                                seq = extra_seq+seq
+                                startCodon = seq[:3]
+                                stopCodon = seq[-3:]
+                            elif '+' in strand:
+                                seq = genome[start -1 :corrected_stop]
+                                extra_seq = genome[:extra_stop +1]
+                                seq = seq+extra_seq
+                                startCodon = seq[:3]
+                                stopCodon = seq[-3:]
+                        else:
+                            if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                                r_start = genome_size - stop
+                                r_stop = genome_size - start
+                                startCodon = genome_rev[r_start:r_start + 3]
+                                stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                            elif '+' in strand:
+                                startCodon = genome[start - 1:start + 2]
+                                stopCodon = genome[stop - 3:stop]
+                        po = str(start) + ',' + str(stop)
+                        orf = [strand, startCodon, stopCodon, line[2], 'GFF-Standard'] # This needs to detect the type
+                        GFF_ORFs.update({po: orf})
+                    # elif "CDS" in line[2]:
+                    #     sys.exit("SAS")
-    GFF_ORFs = sortORFs(GFF_ORFs)
+    for group in GFF_ORFs:
+        GFF_ORFs[group] = sortORFs(GFF_ORFs[group])
     return GFF_ORFs

ORForise/Tools/GLIMMER_3/GLIMMER_3.py CHANGED Viewed

@@ -8,33 +8,40 @@ except ImportError:
     from ORForise.utils import sortORFs
-def GLIMMER_3(tool_pred, genome):
+def GLIMMER_3(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     GLIMMER_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred,
-              'r') as glimmer_input:  # GLIMMER_3 reverses the start and stop positions for ORFS on the negative strand
-        for line in glimmer_input:
-            if '>' not in line:  # This will not work with multiple contigs
-                line = line.split()
-                if len(line) == 5 and "orf" in line[0]:
-                    if '-' in line[3]:  # Reverse Compliment starts and stops adjusted -  Switched to match Sense Strand
-                        start = int(line[2])
-                        stop = int(line[1])
-                        strand = '-'
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in line[3]:
-                        start = int(line[1])
-                        stop = int(line[2])
-                        strand = '+'
-                        startCodon = genome[start - 1:start + 3]
-                        stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, 'CDS']
-                    GLIMMER_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        GLIMMER_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred,
+                  'r') as glimmer_input:  # GLIMMER_3 reverses the start and stop positions for ORFS on the negative strand
+            for line in glimmer_input:
+                if '>' not in line:  # This will not work with multiple contigs
+                    line = line.split()
+                    if len(line) == 5 and "orf" in line[0] and dna_region in line[0]:
+                        if '-' in line[3]:  # Reverse Compliment starts and stops adjusted -  Switched to match Sense Strand
+                            start = int(line[2])
+                            stop = int(line[1])
+                            strand = '-'
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in line[3]:
+                            start = int(line[1])
+                            stop = int(line[2])
+                            strand = '+'
+                            startCodon = genome[start - 1:start + 3]
+                            stopCodon = genome[stop - 3:stop]
+                        po = str(start) + ',' + str(stop)
+                        orf = [strand, startCodon, stopCodon, 'CDS', 'GLIMMER_3']
+                        GLIMMER_ORFs.update({po: orf})
-    GLIMMER_ORFs = sortORFs(GLIMMER_ORFs)
+    for group in GLIMMER_ORFs:
+        GLIMMER_ORFs[group] = sortORFs(GLIMMER_ORFs[group])
     return GLIMMER_ORFs

ORForise 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl

ORForise 1.4.3py3-none-any.whl → 1.5.0py3-none-any.whl