PyPI - ORForise - Versions diffs - 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl - Mend

ORForise 1.4.3py3-none-any.whl → 1.5.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (30) hide show

ORForise/Aggregate_Compare.py +318 -133
ORForise/Annotation_Compare.py +243 -125
ORForise/Comparator.py +600 -552
ORForise/ORForise_Analysis/genome_Metrics.py +51 -33
ORForise/Tools/Augustus/Augustus.py +30 -23
ORForise/Tools/Balrog/Balrog.py +31 -23
ORForise/Tools/EasyGene/EasyGene.py +30 -22
ORForise/Tools/FGENESB/FGENESB.py +32 -25
ORForise/Tools/FragGeneScan/FragGeneScan.py +29 -22
ORForise/Tools/GFF/GFF.py +51 -47
ORForise/Tools/GLIMMER_3/GLIMMER_3.py +34 -27
ORForise/Tools/GeneMark/GeneMark.py +46 -40
ORForise/Tools/GeneMark_HA/GeneMark_HA.py +29 -22
ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +29 -22
ORForise/Tools/GeneMark_S/GeneMark_S.py +29 -22
ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +29 -25
ORForise/Tools/MetaGene/MetaGene.py +29 -22
ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +30 -23
ORForise/Tools/MetaGeneMark/MetaGeneMark.py +30 -23
ORForise/Tools/Prodigal/Prodigal.py +30 -26
ORForise/Tools/Prokka/Prokka.py +30 -25
ORForise/Tools/StORF_Reporter/StORF_Reporter.py +33 -26
ORForise/Tools/TransDecoder/TransDecoder.py +29 -22
ORForise/utils.py +204 -2
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/METADATA +5 -5
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/RECORD +30 -30
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/entry_points.txt +5 -0
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/WHEEL +0 -0
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/licenses/LICENSE +0 -0
{orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/top_level.txt +0 -0

ORForise/Tools/GeneMark/GeneMark.py CHANGED Viewed

@@ -7,48 +7,54 @@ except ImportError:
     from ORForise.utils import revCompIterative
     from ORForise.utils import sortORFs
-def GeneMark(tool_pred, genome):
+def GeneMark(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     geneMark_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    prev_Start = 0
-    prev_Stop = 0
-    started = False
-    with open(tool_pred, 'r') as GeneMark_input:
-        for line in GeneMark_input:
-            line = line.split()
-            if len(line) == 7:
-                started = True
-                if 'direct' in line[2] or 'complement' in line[
-                    2]:  # Strange Output requires strange code - We select the Longest ORF from each set
-                    start = int(line[0])
-                    stop = int(line[1])
-                    strand = line[2]
-                    if 'complement' in strand:  # Reverse Compliment starts and stops adjusted
-                        if start != prev_Start:
-                            r_start = genome_size - stop
-                            r_stop = genome_size - start
-                            strand = '-'
-                            startCodon = genome_rev[r_start:r_start + 3]
-                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                            po = str(start) + ',' + str(stop)
-                            orf = [strand, startCodon, stopCodon, 'CDS']
-                            geneMark_ORFs.update({po: orf})
-                    elif 'direct' in strand:
-                        if stop != prev_Stop:
-                            startCodon = genome[start - 1:start + 2]
-                            stopCodon = genome[stop - 3:stop]
-                            strand = '+'
-                            po = str(start) + ',' + str(stop)
-                            orf = [strand, startCodon, stopCodon, 'CDS']
-                            geneMark_ORFs.update({po: orf})
-                    prev_Start = start
-                    prev_Stop = stop
-            elif len(line) == 0 and started == True:
-                prev_Stop = 0
-                prev_Start = 0
+    for dna_region in dna_regions:
+        geneMark_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        prev_Start = 0
+        prev_Stop = 0
+        started = False
+        with open(tool_pred, 'r') as GeneMark_input:
+            for line in GeneMark_input:
+                line = line.split()
+                if len(line) == 7:
+                    started = True
+                    if 'direct' in line[2] or 'complement' in line[2] and dna_region in line[0]:  # Strange Output requires strange code - We select the Longest ORF from each set
+                        start = int(line[0])
+                        stop = int(line[1])
+                        strand = line[2]
+                        if 'complement' in strand:  # Reverse Compliment starts and stops adjusted
+                            if start != prev_Start:
+                                r_start = genome_size - stop
+                                r_stop = genome_size - start
+                                strand = '-'
+                                startCodon = genome_rev[r_start:r_start + 3]
+                                stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                                po = str(start) + ',' + str(stop)
+                                orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
+                                geneMark_ORFs.update({po: orf})
+                        elif 'direct' in strand:
+                            if stop != prev_Stop:
+                                startCodon = genome[start - 1:start + 2]
+                                stopCodon = genome[stop - 3:stop]
+                                strand = '+'
+                                po = str(start) + ',' + str(stop)
+                                orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
+                                geneMark_ORFs.update({po: orf})
+                        prev_Start = start
+                        prev_Stop = stop
+                elif len(line) == 0 and started == True:
+                    prev_Stop = 0
+                    prev_Start = 0
-    geneMark_ORFs = sortORFs(geneMark_ORFs)
+    for group in geneMark_ORFs:
+        geneMark_ORFs[group] = sortORFs(geneMark_ORFs[group])
     return geneMark_ORFs
 ############# This section can be used to select the ORF with highest probability score.

ORForise/Tools/GeneMark_HA/GeneMark_HA.py CHANGED Viewed

@@ -8,28 +8,35 @@ except ImportError:
     from ORForise.utils import sortORFs
-def GeneMark_HA(tool_pred, genome):
+def GeneMark_HA(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     geneMark_HA_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as GeneMark_HA_input:
-        for line in GeneMark_HA_input:
-            line = line.split()
-            if len(line) >= 9 and "CDS" in line[5]:
-                start = int(line[6])
-                stop = int(line[7])
-                strand = line[9]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                geneMark_HA_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        geneMark_HA_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as GeneMark_HA_input:
+            for line in GeneMark_HA_input:
+                line = line.split()
+                if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
+                    start = int(line[6])
+                    stop = int(line[7])
+                    strand = line[9]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_HA']
+                    geneMark_HA_ORFs.update({po: orf})
-    geneMark_HA_ORFs = sortORFs(geneMark_HA_ORFs)
+    for group in geneMark_HA_ORFs:
+        geneMark_HA_ORFs[group] = sortORFs(geneMark_HA_ORFs[group])
     return geneMark_HA_ORFs

ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py CHANGED Viewed

@@ -9,28 +9,35 @@ except ImportError:
-def GeneMark_HMM(tool_pred, genome):
+def GeneMark_HMM(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     geneMark_HMM_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as GeneMark_HMM_input:
-        for line in GeneMark_HMM_input:
-            line = line.split('\t')
-            if len(line) >= 9 and "CDS" in line[2]:
-                start = int(line[3])
-                stop = int(line[4])
-                strand = line[6]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                geneMark_HMM_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        geneMark_HMM_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as GeneMark_HMM_input:
+            for line in GeneMark_HMM_input:
+                line = line.split('\t')
+                if len(line) >= 9 and "CDS" in line[2] and dna_region in line[0]:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_HMM']
+                    geneMark_HMM_ORFs.update({po: orf})
-    geneMark_HMM_ORFs = sortORFs(geneMark_HMM_ORFs)
+    for group in geneMark_HMM_ORFs:
+        geneMark_HMM_ORFs[group] = sortORFs(geneMark_HMM_ORFs[group])
     return geneMark_HMM_ORFs

ORForise/Tools/GeneMark_S/GeneMark_S.py CHANGED Viewed

@@ -8,28 +8,35 @@ except ImportError:
     from ORForise.utils import sortORFs
-def GeneMark_S(tool_pred, genome):
+def GeneMark_S(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     geneMark_S_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as prodigal_input:
-        for line in prodigal_input:
-            line = line.split()
-            if len(line) >= 9 and "CDS" in line[5]:
-                start = int(line[6])
-                stop = int(line[7])
-                strand = line[9]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                geneMark_S_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        geneMark_S_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as prodigal_input:
+            for line in prodigal_input:
+                line = line.split()
+                if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
+                    start = int(line[6])
+                    stop = int(line[7])
+                    strand = line[9]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S']
+                    geneMark_S_ORFs.update({po: orf})
-    geneMark_S_ORFs = sortORFs(geneMark_S_ORFs)
+    for group in geneMark_S_ORFs:
+        geneMark_S_ORFs[group] = sortORFs(geneMark_S_ORFs[group])
     return geneMark_S_ORFs

ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py CHANGED Viewed

@@ -10,30 +10,34 @@ except ImportError:
 def GeneMark_S_2(*args):
     tool_pred = args[0]
-    genome = args[1]
-    types = args[2]
-    geneMark_S_2_ORFs = collections.defaultdict(list)
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as GeneMark_S_2_input:
-        for line in GeneMark_S_2_input:
-            line = line.split('\t')
-            if len(line) >= 9 and "CDS" in line[2]:
-                start = int(line[3])
-                stop = int(line[4])
-                strand = line[6]
-                info = line[8]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S_2|'+info]
-                geneMark_S_2_ORFs.update({po: orf})
+    dna_regions = args[1]
+    geneMark_S_2_ORFs = collections.defaultdict()
+    for dna_region in dna_regions:
+        geneMark_S_2_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as GeneMark_S_2_input:
+            for line in GeneMark_S_2_input:
+                line = line.split('\t')
+                if len(line) >= 9 and dna_region in line[0] and "CDS" in line[2]:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    info = line[8]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S_2']
+                    geneMark_S_2_ORFs[dna_region].update({po: orf})
-    geneMark_S_2_ORFs = sortORFs(geneMark_S_2_ORFs)
+    for group in geneMark_S_2_ORFs:
+        geneMark_S_2_ORFs[group] = sortORFs(geneMark_S_2_ORFs[group])
     return geneMark_S_2_ORFs

ORForise/Tools/MetaGene/MetaGene.py CHANGED Viewed

@@ -8,28 +8,35 @@ except ImportError:
     from ORForise.utils import sortORFs
-def MetaGene(tool_pred, genome):
+def MetaGene(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     metaGene_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as MetaGene_input:
-        for line in MetaGene_input:
-            line = line.split()
-            if len(line) >= 6 and ("-" in line or '+' in line):
-                start = int(line[0])
-                stop = int(line[1])
-                strand = line[2]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS']
-                metaGene_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        metaGene_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as MetaGene_input:
+            for line in MetaGene_input:
+                line = line.split()
+                if len(line) >= 6 and ("-" in line or '+' in line) and dna_region in line[0]:
+                    start = int(line[0])
+                    stop = int(line[1])
+                    strand = line[2]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGene']
+                    metaGene_ORFs.update({po: orf})
-    metaGene_ORFs = sortORFs(metaGene_ORFs)
+    for group in metaGene_ORFs:
+        metaGene_ORFs[group] = sortORFs(metaGene_ORFs[group])
     return metaGene_ORFs

ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py CHANGED Viewed

@@ -8,29 +8,36 @@ except ImportError:
     from ORForise.utils import sortORFs
-def MetaGeneAnnotator(tool_pred, genome):
+def MetaGeneAnnotator(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     metaGeneAnnotator_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as MetaGeneAnnotator_input:
-        for line in MetaGeneAnnotator_input:
-            line = line.split()
-            if len(line) == 11:
-                if "gene_" in line[0]:
-                    start = int(line[1])
-                    stop = int(line[2])
-                    strand = line[3]
-                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in strand:
-                        startCodon = genome[start - 1:start + 2]
-                        stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, 'CDS']
-                    metaGeneAnnotator_ORFs.update({po: orf})
+    for dna_region in dna_regions:
+        metaGeneAnnotator_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as MetaGeneAnnotator_input:
+            for line in MetaGeneAnnotator_input:
+                line = line.split()
+                if len(line) == 11 and dna_region in line[0]:
+                    if "gene_" in line[0]:
+                        start = int(line[1])
+                        stop = int(line[2])
+                        strand = line[3]
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in strand:
+                            startCodon = genome[start - 1:start + 2]
+                            stopCodon = genome[stop - 3:stop]
+                        po = str(start) + ',' + str(stop)
+                        orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneAnnotator']
+                        metaGeneAnnotator_ORFs.update({po: orf})
-    metaGeneAnnotator_ORFs = sortORFs(metaGeneAnnotator_ORFs)
+    for group in metaGeneAnnotator_ORFs:
+        metaGeneAnnotator_ORFs[group] = sortORFs(metaGeneAnnotator_ORFs[group])
     return metaGeneAnnotator_ORFs

ORForise/Tools/MetaGeneMark/MetaGeneMark.py CHANGED Viewed

@@ -8,29 +8,36 @@ except ImportError:
     from ORForise.utils import sortORFs
-def MetaGeneMark(tool_pred, genome):
+def MetaGeneMark(*args):
+    tool_pred = args[0]
+    dna_regions = args[1]
     metaGeneMarkORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as metaGeneMark_input:
-        for line in metaGeneMark_input:
-            line = line.split()
-            if len(line) == 19:
-                if 'GeneMark.hmm' in line[4] and "CDS" in line[5]:
-                    start = int(line[6])
-                    stop = int(line[7])
-                    strand = line[9]
-                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in strand:
-                        startCodon = genome[start - 1:start + 2]
-                        stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, 'CDS']
-                    metaGeneMarkORFs.update({po: orf})
+    for dna_region in dna_regions:
+        metaGeneMarkORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as metaGeneMark_input:
+            for line in metaGeneMark_input:
+                line = line.split()
+                if len(line) == 19:
+                    if 'GeneMark.hmm' in line[4] and "CDS" in line[5] and dna_region in line[0]:
+                        start = int(line[6])
+                        stop = int(line[7])
+                        strand = line[9]
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in strand:
+                            startCodon = genome[start - 1:start + 2]
+                            stopCodon = genome[stop - 3:stop]
+                        po = str(start) + ',' + str(stop)
+                        orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneMark']
+                        metaGeneMarkORFs.update({po: orf})
-    metaGeneMarkORFs = sortORFs(metaGeneMarkORFs)
+    for group in metaGeneMarkORFs:
+        metaGeneMarkORFs[group] = sortORFs(metaGeneMarkORFs[group])
     return metaGeneMarkORFs

ORForise/Tools/Prodigal/Prodigal.py CHANGED Viewed

@@ -10,30 +10,34 @@ except ImportError:
 def Prodigal(*args):
     tool_pred = args[0]
-    genome = args[1]
-    #types = args[2]
-    prodigalORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as prodigal_input:
-        for line in prodigal_input:
-            line = line.split()
-            if "Prodigal" in line[1] and "CDS" in line[2]:
-                start = int(line[3])
-                stop = int(line[4])
-                strand = line[6]
-                info = line[8]
-                if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                    r_start = genome_size - stop
-                    r_stop = genome_size - start
-                    startCodon = genome_rev[r_start:r_start + 3]
-                    stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                elif '+' in strand:
-                    startCodon = genome[start - 1:start + 2]
-                    stopCodon = genome[stop - 3:stop]
-                po = str(start) + ',' + str(stop)
-                orf = [strand, startCodon, stopCodon, 'CDS', 'Prodigal|'+info]
-                prodigalORFs.update({po: orf})
+    dna_regions = args[1]
+    prodigal_ORFs = collections.OrderedDict()
+    for dna_region in dna_regions:
+        prodigal_ORFs[dna_region] = collections.OrderedDict()
+    for dna_region in dna_regions:
+        genome = dna_regions[dna_region][0]
+        genome_size = len(genome)
+        genome_rev = revCompIterative(genome)
+        with open(tool_pred, 'r') as prodigal_input:
+            for line in prodigal_input:
+                line = line.split()
+                if "Prodigal" in line[1] and dna_region in line[0] and "CDS" in line[2]:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    info = line[8]
+                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                        r_start = genome_size - stop
+                        r_stop = genome_size - start
+                        startCodon = genome_rev[r_start:r_start + 3]
+                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                    elif '+' in strand:
+                        startCodon = genome[start - 1:start + 2]
+                        stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, 'CDS', 'Prodigal']
+                    prodigal_ORFs[dna_region].update({po: orf})
-    prodigalORFs = sortORFs(prodigalORFs)
-    return prodigalORFs
+    for group in prodigal_ORFs:
+        prodigal_ORFs[group] = sortORFs(prodigal_ORFs[group])
+    return prodigal_ORFs

ORForise 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl

ORForise 1.4.3py3-none-any.whl → 1.5.0py3-none-any.whl