ORForise 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Aggregate_Compare.py +318 -133
- ORForise/Annotation_Compare.py +243 -125
- ORForise/Comparator.py +600 -552
- ORForise/ORForise_Analysis/genome_Metrics.py +51 -33
- ORForise/Tools/Augustus/Augustus.py +30 -23
- ORForise/Tools/Balrog/Balrog.py +31 -23
- ORForise/Tools/EasyGene/EasyGene.py +30 -22
- ORForise/Tools/FGENESB/FGENESB.py +32 -25
- ORForise/Tools/FragGeneScan/FragGeneScan.py +29 -22
- ORForise/Tools/GFF/GFF.py +51 -47
- ORForise/Tools/GLIMMER_3/GLIMMER_3.py +34 -27
- ORForise/Tools/GeneMark/GeneMark.py +46 -40
- ORForise/Tools/GeneMark_HA/GeneMark_HA.py +29 -22
- ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +29 -22
- ORForise/Tools/GeneMark_S/GeneMark_S.py +29 -22
- ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +29 -25
- ORForise/Tools/MetaGene/MetaGene.py +29 -22
- ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +30 -23
- ORForise/Tools/MetaGeneMark/MetaGeneMark.py +30 -23
- ORForise/Tools/Prodigal/Prodigal.py +30 -26
- ORForise/Tools/Prokka/Prokka.py +30 -25
- ORForise/Tools/StORF_Reporter/StORF_Reporter.py +33 -26
- ORForise/Tools/TransDecoder/TransDecoder.py +29 -22
- ORForise/utils.py +204 -2
- {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/METADATA +5 -5
- {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/RECORD +30 -30
- {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/entry_points.txt +5 -0
- {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/WHEEL +0 -0
- {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/licenses/LICENSE +0 -0
- {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/top_level.txt +0 -0
|
@@ -7,48 +7,54 @@ except ImportError:
|
|
|
7
7
|
from ORForise.utils import revCompIterative
|
|
8
8
|
from ORForise.utils import sortORFs
|
|
9
9
|
|
|
10
|
-
def GeneMark(
|
|
10
|
+
def GeneMark(*args):
|
|
11
|
+
tool_pred = args[0]
|
|
12
|
+
dna_regions = args[1]
|
|
11
13
|
geneMark_ORFs = collections.OrderedDict()
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
14
|
+
for dna_region in dna_regions:
|
|
15
|
+
geneMark_ORFs[dna_region] = collections.OrderedDict()
|
|
16
|
+
for dna_region in dna_regions:
|
|
17
|
+
genome = dna_regions[dna_region][0]
|
|
18
|
+
genome_size = len(genome)
|
|
19
|
+
genome_rev = revCompIterative(genome)
|
|
20
|
+
prev_Start = 0
|
|
21
|
+
prev_Stop = 0
|
|
22
|
+
started = False
|
|
23
|
+
with open(tool_pred, 'r') as GeneMark_input:
|
|
24
|
+
for line in GeneMark_input:
|
|
25
|
+
line = line.split()
|
|
26
|
+
if len(line) == 7:
|
|
27
|
+
started = True
|
|
28
|
+
if 'direct' in line[2] or 'complement' in line[2] and dna_region in line[0]: # Strange Output requires strange code - We select the Longest ORF from each set
|
|
29
|
+
start = int(line[0])
|
|
30
|
+
stop = int(line[1])
|
|
31
|
+
strand = line[2]
|
|
32
|
+
if 'complement' in strand: # Reverse Compliment starts and stops adjusted
|
|
33
|
+
if start != prev_Start:
|
|
34
|
+
r_start = genome_size - stop
|
|
35
|
+
r_stop = genome_size - start
|
|
36
|
+
strand = '-'
|
|
37
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
38
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
39
|
+
po = str(start) + ',' + str(stop)
|
|
40
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
|
|
41
|
+
geneMark_ORFs.update({po: orf})
|
|
42
|
+
elif 'direct' in strand:
|
|
43
|
+
if stop != prev_Stop:
|
|
44
|
+
startCodon = genome[start - 1:start + 2]
|
|
45
|
+
stopCodon = genome[stop - 3:stop]
|
|
46
|
+
strand = '+'
|
|
47
|
+
po = str(start) + ',' + str(stop)
|
|
48
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
|
|
49
|
+
geneMark_ORFs.update({po: orf})
|
|
50
|
+
prev_Start = start
|
|
51
|
+
prev_Stop = stop
|
|
52
|
+
elif len(line) == 0 and started == True:
|
|
53
|
+
prev_Stop = 0
|
|
54
|
+
prev_Start = 0
|
|
50
55
|
|
|
51
|
-
|
|
56
|
+
for group in geneMark_ORFs:
|
|
57
|
+
geneMark_ORFs[group] = sortORFs(geneMark_ORFs[group])
|
|
52
58
|
return geneMark_ORFs
|
|
53
59
|
|
|
54
60
|
############# This section can be used to select the ORF with highest probability score.
|
|
@@ -8,28 +8,35 @@ except ImportError:
|
|
|
8
8
|
from ORForise.utils import sortORFs
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def GeneMark_HA(
|
|
11
|
+
def GeneMark_HA(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
12
14
|
geneMark_HA_ORFs = collections.OrderedDict()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
geneMark_HA_ORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as GeneMark_HA_input:
|
|
22
|
+
for line in GeneMark_HA_input:
|
|
23
|
+
line = line.split()
|
|
24
|
+
if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
|
|
25
|
+
start = int(line[6])
|
|
26
|
+
stop = int(line[7])
|
|
27
|
+
strand = line[9]
|
|
28
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
29
|
+
r_start = genome_size - stop
|
|
30
|
+
r_stop = genome_size - start
|
|
31
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
32
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
33
|
+
elif '+' in strand:
|
|
34
|
+
startCodon = genome[start - 1:start + 2]
|
|
35
|
+
stopCodon = genome[stop - 3:stop]
|
|
36
|
+
po = str(start) + ',' + str(stop)
|
|
37
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_HA']
|
|
38
|
+
geneMark_HA_ORFs.update({po: orf})
|
|
33
39
|
|
|
34
|
-
|
|
40
|
+
for group in geneMark_HA_ORFs:
|
|
41
|
+
geneMark_HA_ORFs[group] = sortORFs(geneMark_HA_ORFs[group])
|
|
35
42
|
return geneMark_HA_ORFs
|
|
@@ -9,28 +9,35 @@ except ImportError:
|
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
|
|
12
|
-
def GeneMark_HMM(
|
|
12
|
+
def GeneMark_HMM(*args):
|
|
13
|
+
tool_pred = args[0]
|
|
14
|
+
dna_regions = args[1]
|
|
13
15
|
geneMark_HMM_ORFs = collections.OrderedDict()
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
if
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
16
|
+
for dna_region in dna_regions:
|
|
17
|
+
geneMark_HMM_ORFs[dna_region] = collections.OrderedDict()
|
|
18
|
+
for dna_region in dna_regions:
|
|
19
|
+
genome = dna_regions[dna_region][0]
|
|
20
|
+
genome_size = len(genome)
|
|
21
|
+
genome_rev = revCompIterative(genome)
|
|
22
|
+
with open(tool_pred, 'r') as GeneMark_HMM_input:
|
|
23
|
+
for line in GeneMark_HMM_input:
|
|
24
|
+
line = line.split('\t')
|
|
25
|
+
if len(line) >= 9 and "CDS" in line[2] and dna_region in line[0]:
|
|
26
|
+
start = int(line[3])
|
|
27
|
+
stop = int(line[4])
|
|
28
|
+
strand = line[6]
|
|
29
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
30
|
+
r_start = genome_size - stop
|
|
31
|
+
r_stop = genome_size - start
|
|
32
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
33
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
34
|
+
elif '+' in strand:
|
|
35
|
+
startCodon = genome[start - 1:start + 2]
|
|
36
|
+
stopCodon = genome[stop - 3:stop]
|
|
37
|
+
po = str(start) + ',' + str(stop)
|
|
38
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_HMM']
|
|
39
|
+
geneMark_HMM_ORFs.update({po: orf})
|
|
34
40
|
|
|
35
|
-
|
|
41
|
+
for group in geneMark_HMM_ORFs:
|
|
42
|
+
geneMark_HMM_ORFs[group] = sortORFs(geneMark_HMM_ORFs[group])
|
|
36
43
|
return geneMark_HMM_ORFs
|
|
@@ -8,28 +8,35 @@ except ImportError:
|
|
|
8
8
|
from ORForise.utils import sortORFs
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def GeneMark_S(
|
|
11
|
+
def GeneMark_S(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
12
14
|
geneMark_S_ORFs = collections.OrderedDict()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
geneMark_S_ORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as prodigal_input:
|
|
22
|
+
for line in prodigal_input:
|
|
23
|
+
line = line.split()
|
|
24
|
+
if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
|
|
25
|
+
start = int(line[6])
|
|
26
|
+
stop = int(line[7])
|
|
27
|
+
strand = line[9]
|
|
28
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
29
|
+
r_start = genome_size - stop
|
|
30
|
+
r_stop = genome_size - start
|
|
31
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
32
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
33
|
+
elif '+' in strand:
|
|
34
|
+
startCodon = genome[start - 1:start + 2]
|
|
35
|
+
stopCodon = genome[stop - 3:stop]
|
|
36
|
+
po = str(start) + ',' + str(stop)
|
|
37
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S']
|
|
38
|
+
geneMark_S_ORFs.update({po: orf})
|
|
33
39
|
|
|
34
|
-
|
|
40
|
+
for group in geneMark_S_ORFs:
|
|
41
|
+
geneMark_S_ORFs[group] = sortORFs(geneMark_S_ORFs[group])
|
|
35
42
|
return geneMark_S_ORFs
|
|
@@ -10,30 +10,34 @@ except ImportError:
|
|
|
10
10
|
|
|
11
11
|
def GeneMark_S_2(*args):
|
|
12
12
|
tool_pred = args[0]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
geneMark_S_2_ORFs = collections.defaultdict()
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
geneMark_S_2_ORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as GeneMark_S_2_input:
|
|
22
|
+
for line in GeneMark_S_2_input:
|
|
23
|
+
line = line.split('\t')
|
|
24
|
+
if len(line) >= 9 and dna_region in line[0] and "CDS" in line[2]:
|
|
25
|
+
start = int(line[3])
|
|
26
|
+
stop = int(line[4])
|
|
27
|
+
strand = line[6]
|
|
28
|
+
info = line[8]
|
|
29
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
30
|
+
r_start = genome_size - stop
|
|
31
|
+
r_stop = genome_size - start
|
|
32
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
33
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
34
|
+
elif '+' in strand:
|
|
35
|
+
startCodon = genome[start - 1:start + 2]
|
|
36
|
+
stopCodon = genome[stop - 3:stop]
|
|
37
|
+
po = str(start) + ',' + str(stop)
|
|
38
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S_2']
|
|
39
|
+
geneMark_S_2_ORFs[dna_region].update({po: orf})
|
|
37
40
|
|
|
38
|
-
|
|
41
|
+
for group in geneMark_S_2_ORFs:
|
|
42
|
+
geneMark_S_2_ORFs[group] = sortORFs(geneMark_S_2_ORFs[group])
|
|
39
43
|
return geneMark_S_2_ORFs
|
|
@@ -8,28 +8,35 @@ except ImportError:
|
|
|
8
8
|
from ORForise.utils import sortORFs
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def MetaGene(
|
|
11
|
+
def MetaGene(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
12
14
|
metaGene_ORFs = collections.OrderedDict()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
if
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
metaGene_ORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as MetaGene_input:
|
|
22
|
+
for line in MetaGene_input:
|
|
23
|
+
line = line.split()
|
|
24
|
+
if len(line) >= 6 and ("-" in line or '+' in line) and dna_region in line[0]:
|
|
25
|
+
start = int(line[0])
|
|
26
|
+
stop = int(line[1])
|
|
27
|
+
strand = line[2]
|
|
28
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
29
|
+
r_start = genome_size - stop
|
|
30
|
+
r_stop = genome_size - start
|
|
31
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
32
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
33
|
+
elif '+' in strand:
|
|
34
|
+
startCodon = genome[start - 1:start + 2]
|
|
35
|
+
stopCodon = genome[stop - 3:stop]
|
|
36
|
+
po = str(start) + ',' + str(stop)
|
|
37
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGene']
|
|
38
|
+
metaGene_ORFs.update({po: orf})
|
|
33
39
|
|
|
34
|
-
|
|
40
|
+
for group in metaGene_ORFs:
|
|
41
|
+
metaGene_ORFs[group] = sortORFs(metaGene_ORFs[group])
|
|
35
42
|
return metaGene_ORFs
|
|
@@ -8,29 +8,36 @@ except ImportError:
|
|
|
8
8
|
from ORForise.utils import sortORFs
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def MetaGeneAnnotator(
|
|
11
|
+
def MetaGeneAnnotator(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
12
14
|
metaGeneAnnotator_ORFs = collections.OrderedDict()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
if
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
metaGeneAnnotator_ORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as MetaGeneAnnotator_input:
|
|
22
|
+
for line in MetaGeneAnnotator_input:
|
|
23
|
+
line = line.split()
|
|
24
|
+
if len(line) == 11 and dna_region in line[0]:
|
|
25
|
+
if "gene_" in line[0]:
|
|
26
|
+
start = int(line[1])
|
|
27
|
+
stop = int(line[2])
|
|
28
|
+
strand = line[3]
|
|
29
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
30
|
+
r_start = genome_size - stop
|
|
31
|
+
r_stop = genome_size - start
|
|
32
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
33
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
34
|
+
elif '+' in strand:
|
|
35
|
+
startCodon = genome[start - 1:start + 2]
|
|
36
|
+
stopCodon = genome[stop - 3:stop]
|
|
37
|
+
po = str(start) + ',' + str(stop)
|
|
38
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneAnnotator']
|
|
39
|
+
metaGeneAnnotator_ORFs.update({po: orf})
|
|
34
40
|
|
|
35
|
-
|
|
41
|
+
for group in metaGeneAnnotator_ORFs:
|
|
42
|
+
metaGeneAnnotator_ORFs[group] = sortORFs(metaGeneAnnotator_ORFs[group])
|
|
36
43
|
return metaGeneAnnotator_ORFs
|
|
@@ -8,29 +8,36 @@ except ImportError:
|
|
|
8
8
|
from ORForise.utils import sortORFs
|
|
9
9
|
|
|
10
10
|
|
|
11
|
-
def MetaGeneMark(
|
|
11
|
+
def MetaGeneMark(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
12
14
|
metaGeneMarkORFs = collections.OrderedDict()
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
if '
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
metaGeneMarkORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as metaGeneMark_input:
|
|
22
|
+
for line in metaGeneMark_input:
|
|
23
|
+
line = line.split()
|
|
24
|
+
if len(line) == 19:
|
|
25
|
+
if 'GeneMark.hmm' in line[4] and "CDS" in line[5] and dna_region in line[0]:
|
|
26
|
+
start = int(line[6])
|
|
27
|
+
stop = int(line[7])
|
|
28
|
+
strand = line[9]
|
|
29
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
30
|
+
r_start = genome_size - stop
|
|
31
|
+
r_stop = genome_size - start
|
|
32
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
33
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
34
|
+
elif '+' in strand:
|
|
35
|
+
startCodon = genome[start - 1:start + 2]
|
|
36
|
+
stopCodon = genome[stop - 3:stop]
|
|
37
|
+
po = str(start) + ',' + str(stop)
|
|
38
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneMark']
|
|
39
|
+
metaGeneMarkORFs.update({po: orf})
|
|
34
40
|
|
|
35
|
-
|
|
41
|
+
for group in metaGeneMarkORFs:
|
|
42
|
+
metaGeneMarkORFs[group] = sortORFs(metaGeneMarkORFs[group])
|
|
36
43
|
return metaGeneMarkORFs
|
|
@@ -10,30 +10,34 @@ except ImportError:
|
|
|
10
10
|
|
|
11
11
|
def Prodigal(*args):
|
|
12
12
|
tool_pred = args[0]
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
prodigal_ORFs = collections.OrderedDict()
|
|
15
|
+
for dna_region in dna_regions:
|
|
16
|
+
prodigal_ORFs[dna_region] = collections.OrderedDict()
|
|
17
|
+
for dna_region in dna_regions:
|
|
18
|
+
genome = dna_regions[dna_region][0]
|
|
19
|
+
genome_size = len(genome)
|
|
20
|
+
genome_rev = revCompIterative(genome)
|
|
21
|
+
with open(tool_pred, 'r') as prodigal_input:
|
|
22
|
+
for line in prodigal_input:
|
|
23
|
+
line = line.split()
|
|
24
|
+
if "Prodigal" in line[1] and dna_region in line[0] and "CDS" in line[2]:
|
|
25
|
+
start = int(line[3])
|
|
26
|
+
stop = int(line[4])
|
|
27
|
+
strand = line[6]
|
|
28
|
+
info = line[8]
|
|
29
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
30
|
+
r_start = genome_size - stop
|
|
31
|
+
r_stop = genome_size - start
|
|
32
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
33
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
34
|
+
elif '+' in strand:
|
|
35
|
+
startCodon = genome[start - 1:start + 2]
|
|
36
|
+
stopCodon = genome[stop - 3:stop]
|
|
37
|
+
po = str(start) + ',' + str(stop)
|
|
38
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'Prodigal']
|
|
39
|
+
prodigal_ORFs[dna_region].update({po: orf})
|
|
37
40
|
|
|
38
|
-
|
|
39
|
-
|
|
41
|
+
for group in prodigal_ORFs:
|
|
42
|
+
prodigal_ORFs[group] = sortORFs(prodigal_ORFs[group])
|
|
43
|
+
return prodigal_ORFs
|