ORForise 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Aggregate_Compare.py +378 -0
- ORForise/Annotation_Compare.py +317 -0
- ORForise/Annotation_Intersector.py +726 -0
- ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +53 -0
- ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/StORF_Undetected.py +35 -0
- ORForise/Aux/StORF_Undetected/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +46 -0
- ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
- ORForise/Aux/TabToGFF/__init__.py +0 -0
- ORForise/Aux/__init__.py +0 -0
- ORForise/Comparator.py +882 -0
- ORForise/Convert_To_GFF.py +141 -0
- ORForise/GFF_Adder.py +543 -0
- ORForise/List_Tools.py +56 -0
- ORForise/ORForise_Analysis/__init__.py +0 -0
- ORForise/ORForise_Analysis/cds_checker.py +77 -0
- ORForise/ORForise_Analysis/gene_Lenghts.py +28 -0
- ORForise/ORForise_Analysis/genome_Metrics.py +258 -0
- ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +88 -0
- ORForise/ORForise_Analysis/missed_Gene_Metrics.py +277 -0
- ORForise/ORForise_Analysis/parital_Match_Analysis.py +230 -0
- ORForise/ORForise_Analysis/result_File_Analysis.py +286 -0
- ORForise/ORForise_Analysis/start_Codon_Substitution.py +161 -0
- ORForise/StORForise.py +115 -0
- ORForise/Tools/Augustus/Augustus.py +54 -0
- ORForise/Tools/Augustus/__init__.py +0 -0
- ORForise/Tools/Balrog/Balrog.py +56 -0
- ORForise/Tools/Balrog/__init__.py +0 -0
- ORForise/Tools/EasyGene/EasyGene.py +55 -0
- ORForise/Tools/EasyGene/__init__.py +0 -0
- ORForise/Tools/FGENESB/FGENESB.py +57 -0
- ORForise/Tools/FGENESB/__init__.py +0 -0
- ORForise/Tools/FragGeneScan/FragGeneScan.py +54 -0
- ORForise/Tools/FragGeneScan/__init__.py +0 -0
- ORForise/Tools/GFF/GFF.py +77 -0
- ORForise/Tools/GFF/__init__.py +0 -0
- ORForise/Tools/GLIMMER3/GLIMMER3.py +59 -0
- ORForise/Tools/GLIMMER3/__init__.py +0 -0
- ORForise/Tools/GeneMark/GeneMark.py +135 -0
- ORForise/Tools/GeneMark/__init__.py +0 -0
- ORForise/Tools/GeneMarkHA/GeneMarkHA.py +54 -0
- ORForise/Tools/GeneMarkHA/__init__.py +0 -0
- ORForise/Tools/GeneMarkHMM/GeneMarkHMM.py +55 -0
- ORForise/Tools/GeneMarkHMM/__init__.py +0 -0
- ORForise/Tools/GeneMarkS/GeneMarkS.py +54 -0
- ORForise/Tools/GeneMarkS/__init__.py +0 -0
- ORForise/Tools/GeneMarkS2/GeneMarkS2.py +55 -0
- ORForise/Tools/GeneMarkS2/__init__.py +0 -0
- ORForise/Tools/MetaGene/MetaGene.py +54 -0
- ORForise/Tools/MetaGene/__init__.py +0 -0
- ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +55 -0
- ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- ORForise/Tools/MetaGeneMark/MetaGeneMark.py +55 -0
- ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- ORForise/Tools/Prodigal/Prodigal.py +55 -0
- ORForise/Tools/Prodigal/__init__.py +0 -0
- ORForise/Tools/Prokka/Prokka.py +57 -0
- ORForise/Tools/Prokka/__init__.py +0 -0
- ORForise/Tools/StORF-Reporter/StORF-Reporter.py +56 -0
- ORForise/Tools/StORF-Reporter/__init__.py +0 -0
- ORForise/Tools/TransDecoder/TransDecoder.py +54 -0
- ORForise/Tools/TransDecoder/__init__.py +0 -0
- ORForise/Tools/__init__.py +0 -0
- ORForise/__init__.py +0 -0
- ORForise/utils.py +236 -0
- orforise-1.6.2.dist-info/METADATA +1038 -0
- orforise-1.6.2.dist-info/RECORD +73 -0
- orforise-1.6.2.dist-info/WHEEL +5 -0
- orforise-1.6.2.dist-info/entry_points.txt +15 -0
- orforise-1.6.2.dist-info/licenses/LICENSE +624 -0
- orforise-1.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
def GeneMark(*args):
|
|
11
|
+
tool_pred = args[0]
|
|
12
|
+
dna_regions = args[1]
|
|
13
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
14
|
+
dna_regions = collections.OrderedDict()
|
|
15
|
+
with open(tool_pred, 'r') as GeneMark_input:
|
|
16
|
+
for line in GeneMark_input:
|
|
17
|
+
line = line.split()
|
|
18
|
+
if 'direct' in line[2] or 'complement' in line[2] and line[0] not in dna_regions:
|
|
19
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
20
|
+
return dna_regions
|
|
21
|
+
|
|
22
|
+
geneMark_ORFs = collections.OrderedDict()
|
|
23
|
+
for dna_region in dna_regions:
|
|
24
|
+
geneMark_ORFs[dna_region] = collections.OrderedDict()
|
|
25
|
+
for dna_region in dna_regions:
|
|
26
|
+
try:
|
|
27
|
+
genome = dna_regions[dna_region][0]
|
|
28
|
+
except IndexError:
|
|
29
|
+
genome = dna_regions[dna_region]
|
|
30
|
+
genome_size = len(genome)
|
|
31
|
+
genome_rev = revCompIterative(genome)
|
|
32
|
+
prev_Start = 0
|
|
33
|
+
prev_Stop = 0
|
|
34
|
+
started = False
|
|
35
|
+
with open(tool_pred, 'r') as GeneMark_input:
|
|
36
|
+
for line in GeneMark_input:
|
|
37
|
+
line = line.split()
|
|
38
|
+
if len(line) == 7:
|
|
39
|
+
started = True
|
|
40
|
+
if 'direct' in line[2] or 'complement' in line[2] and dna_region in line[0]: # Strange Output requires strange code - We select the Longest ORF from each set
|
|
41
|
+
start = int(line[0])
|
|
42
|
+
stop = int(line[1])
|
|
43
|
+
strand = line[2]
|
|
44
|
+
if 'complement' in strand: # Reverse Compliment starts and stops adjusted
|
|
45
|
+
if start != prev_Start:
|
|
46
|
+
r_start = genome_size - stop
|
|
47
|
+
r_stop = genome_size - start
|
|
48
|
+
strand = '-'
|
|
49
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
50
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
51
|
+
po = str(start) + ',' + str(stop)
|
|
52
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
|
|
53
|
+
geneMark_ORFs.update({po: orf})
|
|
54
|
+
elif 'direct' in strand:
|
|
55
|
+
if stop != prev_Stop:
|
|
56
|
+
startCodon = genome[start - 1:start + 2]
|
|
57
|
+
stopCodon = genome[stop - 3:stop]
|
|
58
|
+
strand = '+'
|
|
59
|
+
po = str(start) + ',' + str(stop)
|
|
60
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
|
|
61
|
+
geneMark_ORFs.update({po: orf})
|
|
62
|
+
prev_Start = start
|
|
63
|
+
prev_Stop = stop
|
|
64
|
+
elif len(line) == 0 and started == True:
|
|
65
|
+
prev_Stop = 0
|
|
66
|
+
prev_Start = 0
|
|
67
|
+
|
|
68
|
+
for group in geneMark_ORFs:
|
|
69
|
+
geneMark_ORFs[group] = sortORFs(geneMark_ORFs[group])
|
|
70
|
+
return geneMark_ORFs
|
|
71
|
+
|
|
72
|
+
############# This section can be used to select the ORF with highest probability score.
|
|
73
|
+
# with open('Tools/GeneMark/' + input_to_analyse, 'r') as GeneMark_input:
|
|
74
|
+
# prob_score = 0
|
|
75
|
+
# started = False
|
|
76
|
+
#
|
|
77
|
+
# for line in GeneMark_input:
|
|
78
|
+
# line = line.split()
|
|
79
|
+
#
|
|
80
|
+
# if len(line) == 7:
|
|
81
|
+
# if 'direct' in line[2] or 'complement' in line[2] and '....' not in line[6] : # Strange Output requires strange code
|
|
82
|
+
# started = True
|
|
83
|
+
# start = int(line[0])
|
|
84
|
+
# stop = int(line[1])
|
|
85
|
+
# score = float(line[5])
|
|
86
|
+
# strand = line[2]
|
|
87
|
+
# if 'complement' in strand: # Reverse Compliment starts and stops to confirm to our definition
|
|
88
|
+
# if start != prev_Start:
|
|
89
|
+
# prob_score = score
|
|
90
|
+
# # Switched to match Sense Strand
|
|
91
|
+
# r_start = genome_size - stop
|
|
92
|
+
# r_stop = genome_size - start
|
|
93
|
+
# strand = '-'
|
|
94
|
+
# startCodon = genome_rev[r_start:r_start + 3]
|
|
95
|
+
# stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
96
|
+
# po = str(start) + ',' + str(stop)
|
|
97
|
+
# orf = [strand, startCodon, stopCodon]
|
|
98
|
+
# elif start == prev_Start and score > prob_score:
|
|
99
|
+
# # Switched to match Sense Strand
|
|
100
|
+
# prob_score = score
|
|
101
|
+
# r_start = genome_size - stop
|
|
102
|
+
# r_stop = genome_size - start
|
|
103
|
+
# strand = '-'
|
|
104
|
+
# startCodon = genome_rev[r_start:r_start + 3]
|
|
105
|
+
# stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
106
|
+
# po = str(start) + ',' + str(stop)
|
|
107
|
+
# orf = [strand, startCodon, stopCodon]
|
|
108
|
+
# elif 'direct' in strand:
|
|
109
|
+
# if stop != prev_Stop:
|
|
110
|
+
# prob_score = score
|
|
111
|
+
# startCodon = genome[start - 1:start - 1 + 3]
|
|
112
|
+
# stopCodon = genome[stop - 3:stop - 1 + 1]
|
|
113
|
+
# strand = '+'
|
|
114
|
+
# po = str(start) + ',' + str(stop)
|
|
115
|
+
# orf = [strand, startCodon, stopCodon]
|
|
116
|
+
# elif stop == prev_Stop and score > prob_score:
|
|
117
|
+
# prob_score = score
|
|
118
|
+
# startCodon = genome[start - 1:start - 1 + 3]
|
|
119
|
+
# stopCodon = genome[stop - 3:stop - 1 + 1]
|
|
120
|
+
# strand = '+'
|
|
121
|
+
# po = str(start) + ',' + str(stop)
|
|
122
|
+
# orf = [strand, startCodon, stopCodon]
|
|
123
|
+
# prev_Start = start
|
|
124
|
+
# prev_Stop = stop
|
|
125
|
+
# elif len(line) == 0 and started == True:
|
|
126
|
+
# prob_score = 0
|
|
127
|
+
# prev_Start = 0
|
|
128
|
+
# prev_Stop = 0
|
|
129
|
+
# GeneMark_ORFs.update({po: orf})
|
|
130
|
+
# po = ''
|
|
131
|
+
# orf = []
|
|
132
|
+
# #Remove last empty dict
|
|
133
|
+
# del GeneMark_ORFs['']
|
|
134
|
+
# print(GeneMark_ORFs)
|
|
135
|
+
# return GeneMark_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def GeneMark_HA(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as GeneMarkHA_input:
|
|
17
|
+
for line in GeneMarkHA_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if len(line) >= 9 and "CDS" in line[5] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
geneMarkHA_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
geneMarkHA_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as GeneMarkHA_input:
|
|
34
|
+
for line in GeneMarkHA_input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
|
|
37
|
+
start = int(line[6])
|
|
38
|
+
stop = int(line[7])
|
|
39
|
+
strand = line[9]
|
|
40
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
41
|
+
r_start = genome_size - stop
|
|
42
|
+
r_stop = genome_size - start
|
|
43
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
44
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
45
|
+
elif '+' in strand:
|
|
46
|
+
startCodon = genome[start - 1:start + 2]
|
|
47
|
+
stopCodon = genome[stop - 3:stop]
|
|
48
|
+
po = str(start) + ',' + str(stop)
|
|
49
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMarkHA']
|
|
50
|
+
geneMarkHA_ORFs.update({po: orf})
|
|
51
|
+
|
|
52
|
+
for group in geneMarkHA_ORFs:
|
|
53
|
+
geneMarkHA_ORFs[group] = sortORFs(geneMarkHA_ORFs[group])
|
|
54
|
+
return geneMarkHA_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
def GeneMark_HMM(*args):
|
|
13
|
+
tool_pred = args[0]
|
|
14
|
+
dna_regions = args[1]
|
|
15
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
16
|
+
dna_regions = collections.OrderedDict()
|
|
17
|
+
with open(tool_pred, 'r') as GeneMarkHMM_input:
|
|
18
|
+
for line in GeneMarkHMM_input:
|
|
19
|
+
line = line.split()
|
|
20
|
+
if len(line) >= 9 and "CDS" in line[2] and line[0] not in dna_regions:
|
|
21
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
22
|
+
return dna_regions
|
|
23
|
+
|
|
24
|
+
geneMarkHMM_ORFs = collections.OrderedDict()
|
|
25
|
+
for dna_region in dna_regions:
|
|
26
|
+
geneMarkHMM_ORFs[dna_region] = collections.OrderedDict()
|
|
27
|
+
for dna_region in dna_regions:
|
|
28
|
+
try:
|
|
29
|
+
genome = dna_regions[dna_region][0]
|
|
30
|
+
except IndexError:
|
|
31
|
+
genome = dna_regions[dna_region]
|
|
32
|
+
genome_size = len(genome)
|
|
33
|
+
genome_rev = revCompIterative(genome)
|
|
34
|
+
with open(tool_pred, 'r') as GeneMarkHMM_input:
|
|
35
|
+
for line in GeneMarkHMM_input:
|
|
36
|
+
line = line.split('\t')
|
|
37
|
+
if len(line) >= 9 and "CDS" in line[2] and dna_region in line[0]:
|
|
38
|
+
start = int(line[3])
|
|
39
|
+
stop = int(line[4])
|
|
40
|
+
strand = line[6]
|
|
41
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
42
|
+
r_start = genome_size - stop
|
|
43
|
+
r_stop = genome_size - start
|
|
44
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
45
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
46
|
+
elif '+' in strand:
|
|
47
|
+
startCodon = genome[start - 1:start + 2]
|
|
48
|
+
stopCodon = genome[stop - 3:stop]
|
|
49
|
+
po = str(start) + ',' + str(stop)
|
|
50
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMarkHMM']
|
|
51
|
+
geneMarkHMM_ORFs.update({po: orf})
|
|
52
|
+
|
|
53
|
+
for group in geneMarkHMM_ORFs:
|
|
54
|
+
geneMarkHMM_ORFs[group] = sortORFs(geneMarkHMM_ORFs[group])
|
|
55
|
+
return geneMarkHMM_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def GeneMark_S(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as GeneMarkS_input:
|
|
17
|
+
for line in GeneMarkS_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if len(line) >= 9 and "CDS" in line[5] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
geneMarkS_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
geneMarkS_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as GeneMarkS_input:
|
|
34
|
+
for line in GeneMarkS_input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
|
|
37
|
+
start = int(line[6])
|
|
38
|
+
stop = int(line[7])
|
|
39
|
+
strand = line[9]
|
|
40
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
41
|
+
r_start = genome_size - stop
|
|
42
|
+
r_stop = genome_size - start
|
|
43
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
44
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
45
|
+
elif '+' in strand:
|
|
46
|
+
startCodon = genome[start - 1:start + 2]
|
|
47
|
+
stopCodon = genome[stop - 3:stop]
|
|
48
|
+
po = str(start) + ',' + str(stop)
|
|
49
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMarkS']
|
|
50
|
+
geneMarkS_ORFs.update({po: orf})
|
|
51
|
+
|
|
52
|
+
for group in geneMarkS_ORFs:
|
|
53
|
+
geneMarkS_ORFs[group] = sortORFs(geneMarkS_ORFs[group])
|
|
54
|
+
return geneMarkS_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def GeneMarkS2(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as GeneMarkS2_input:
|
|
17
|
+
for line in GeneMarkS2_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if len(line) >= 9 and "CDS" in line[2] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
geneMarkS2_ORFs = collections.defaultdict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
geneMarkS2_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as GeneMarkS2_input:
|
|
34
|
+
for line in GeneMarkS2_input:
|
|
35
|
+
line = line.split('\t')
|
|
36
|
+
if len(line) >= 9 and dna_region in line[0] and "CDS" in line[2]:
|
|
37
|
+
start = int(line[3])
|
|
38
|
+
stop = int(line[4])
|
|
39
|
+
strand = line[6]
|
|
40
|
+
info = line[8]
|
|
41
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
42
|
+
r_start = genome_size - stop
|
|
43
|
+
r_stop = genome_size - start
|
|
44
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
45
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
46
|
+
elif '+' in strand:
|
|
47
|
+
startCodon = genome[start - 1:start + 2]
|
|
48
|
+
stopCodon = genome[stop - 3:stop]
|
|
49
|
+
po = str(start) + ',' + str(stop)
|
|
50
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMarkS2']
|
|
51
|
+
geneMarkS2_ORFs[dna_region].update({po: orf})
|
|
52
|
+
|
|
53
|
+
for group in geneMarkS2_ORFs:
|
|
54
|
+
geneMarkS2_ORFs[group] = sortORFs(geneMarkS2_ORFs[group])
|
|
55
|
+
return geneMarkS2_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def MetaGene(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as MetaGene_input:
|
|
17
|
+
for line in MetaGene_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if len(line) >= 6 and ("-" in line or '+' in line) and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
metaGene_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
metaGene_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as MetaGene_input:
|
|
34
|
+
for line in MetaGene_input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if len(line) >= 6 and ("-" in line or '+' in line) and dna_region in line[0]:
|
|
37
|
+
start = int(line[0])
|
|
38
|
+
stop = int(line[1])
|
|
39
|
+
strand = line[2]
|
|
40
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
41
|
+
r_start = genome_size - stop
|
|
42
|
+
r_stop = genome_size - start
|
|
43
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
44
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
45
|
+
elif '+' in strand:
|
|
46
|
+
startCodon = genome[start - 1:start + 2]
|
|
47
|
+
stopCodon = genome[stop - 3:stop]
|
|
48
|
+
po = str(start) + ',' + str(stop)
|
|
49
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGene']
|
|
50
|
+
metaGene_ORFs.update({po: orf})
|
|
51
|
+
|
|
52
|
+
for group in metaGene_ORFs:
|
|
53
|
+
metaGene_ORFs[group] = sortORFs(metaGene_ORFs[group])
|
|
54
|
+
return metaGene_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def MetaGeneAnnotator(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as MetaGeneAnnotator_input:
|
|
17
|
+
for line in MetaGeneAnnotator_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if len(line) == 11 and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
metaGeneAnnotator_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
metaGeneAnnotator_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as MetaGeneAnnotator_input:
|
|
34
|
+
for line in MetaGeneAnnotator_input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if len(line) == 11 and dna_region in line[0]:
|
|
37
|
+
if "gene_" in line[0]:
|
|
38
|
+
start = int(line[1])
|
|
39
|
+
stop = int(line[2])
|
|
40
|
+
strand = line[3]
|
|
41
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
42
|
+
r_start = genome_size - stop
|
|
43
|
+
r_stop = genome_size - start
|
|
44
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
45
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
46
|
+
elif '+' in strand:
|
|
47
|
+
startCodon = genome[start - 1:start + 2]
|
|
48
|
+
stopCodon = genome[stop - 3:stop]
|
|
49
|
+
po = str(start) + ',' + str(stop)
|
|
50
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneAnnotator']
|
|
51
|
+
metaGeneAnnotator_ORFs.update({po: orf})
|
|
52
|
+
|
|
53
|
+
for group in metaGeneAnnotator_ORFs:
|
|
54
|
+
metaGeneAnnotator_ORFs[group] = sortORFs(metaGeneAnnotator_ORFs[group])
|
|
55
|
+
return metaGeneAnnotator_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def MetaGeneMark(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as MetaGeneMark_input:
|
|
17
|
+
for line in MetaGeneMark_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if 'GeneMark.hmm' in line[4] and "CDS" in line[5] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
metaGeneMarkORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
metaGeneMarkORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as metaGeneMark_input:
|
|
34
|
+
for line in metaGeneMark_input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if len(line) == 19:
|
|
37
|
+
if 'GeneMark.hmm' in line[4] and "CDS" in line[5] and dna_region in line[0]:
|
|
38
|
+
start = int(line[6])
|
|
39
|
+
stop = int(line[7])
|
|
40
|
+
strand = line[9]
|
|
41
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
42
|
+
r_start = genome_size - stop
|
|
43
|
+
r_stop = genome_size - start
|
|
44
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
45
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
46
|
+
elif '+' in strand:
|
|
47
|
+
startCodon = genome[start - 1:start + 2]
|
|
48
|
+
stopCodon = genome[stop - 3:stop]
|
|
49
|
+
po = str(start) + ',' + str(stop)
|
|
50
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneMark']
|
|
51
|
+
metaGeneMarkORFs.update({po: orf})
|
|
52
|
+
|
|
53
|
+
for group in metaGeneMarkORFs:
|
|
54
|
+
metaGeneMarkORFs[group] = sortORFs(metaGeneMarkORFs[group])
|
|
55
|
+
return metaGeneMarkORFs
|
|
File without changes
|
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def Prodigal(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as Prodigal_input:
|
|
17
|
+
for line in Prodigal_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if "Prodigal" in line[1] and "CDS" in line[2] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
prodigal_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
prodigal_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as prodigal_input:
|
|
34
|
+
for line in prodigal_input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if "Prodigal" in line[1] and dna_region in line[0] and "CDS" in line[2]:
|
|
37
|
+
start = int(line[3])
|
|
38
|
+
stop = int(line[4])
|
|
39
|
+
strand = line[6]
|
|
40
|
+
info = line[8]
|
|
41
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
42
|
+
r_start = genome_size - stop
|
|
43
|
+
r_stop = genome_size - start
|
|
44
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
45
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
46
|
+
elif '+' in strand:
|
|
47
|
+
startCodon = genome[start - 1:start + 2]
|
|
48
|
+
stopCodon = genome[stop - 3:stop]
|
|
49
|
+
po = str(start) + ',' + str(stop)
|
|
50
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'Prodigal']
|
|
51
|
+
prodigal_ORFs[dna_region].update({po: orf})
|
|
52
|
+
|
|
53
|
+
for group in prodigal_ORFs:
|
|
54
|
+
prodigal_ORFs[group] = sortORFs(prodigal_ORFs[group])
|
|
55
|
+
return prodigal_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def Prokka(*args): # UNFINISHED
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
types = args[2]
|
|
15
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
16
|
+
dna_regions = collections.OrderedDict()
|
|
17
|
+
with open(tool_pred, 'r') as PROKKA_input:
|
|
18
|
+
for line in PROKKA_input:
|
|
19
|
+
line = line.split()
|
|
20
|
+
if "Prodigal" in line[1] and "CDS" in line[2] and line[0] not in dna_regions:
|
|
21
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
22
|
+
return dna_regions
|
|
23
|
+
|
|
24
|
+
prokkaORFs = collections.defaultdict(list)
|
|
25
|
+
for dna_region in dna_regions:
|
|
26
|
+
prokkaORFs[dna_region] = collections.OrderedDict()
|
|
27
|
+
for dna_region in dna_regions:
|
|
28
|
+
try:
|
|
29
|
+
genome = dna_regions[dna_region][0]
|
|
30
|
+
except IndexError:
|
|
31
|
+
genome = dna_regions[dna_region]
|
|
32
|
+
genome_size = len(genome)
|
|
33
|
+
genome_rev = revCompIterative(genome)
|
|
34
|
+
with open(tool_pred, 'r') as prodigal_input:
|
|
35
|
+
for line in prodigal_input:
|
|
36
|
+
if '#' not in line:
|
|
37
|
+
line = line.split('\t')
|
|
38
|
+
if "prokka" not in line[1] and line[8].startswith('ID=') and dna_region in line[0] and "CDS" in line[2]:
|
|
39
|
+
start = int(line[3])
|
|
40
|
+
stop = int(line[4])
|
|
41
|
+
strand = line[6]
|
|
42
|
+
info = line[8]
|
|
43
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
44
|
+
r_start = genome_size - stop
|
|
45
|
+
r_stop = genome_size - start
|
|
46
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
47
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
48
|
+
elif '+' in strand:
|
|
49
|
+
startCodon = genome[start - 1:start + 2]
|
|
50
|
+
stopCodon = genome[stop - 3:stop]
|
|
51
|
+
po = str(start) + ',' + str(stop)
|
|
52
|
+
orf = [strand, startCodon, stopCodon, line[2], 'Prokka']
|
|
53
|
+
prokkaORFs.update({po: orf})
|
|
54
|
+
|
|
55
|
+
for group in prokkaORFs:
|
|
56
|
+
prokkaORFs[group] = sortORFs(prokkaORFs[group])
|
|
57
|
+
return prokkaORFs
|
|
File without changes
|