ORForise 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Aggregate_Compare.py +378 -0
- ORForise/Annotation_Compare.py +317 -0
- ORForise/Annotation_Intersector.py +726 -0
- ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +53 -0
- ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/StORF_Undetected.py +35 -0
- ORForise/Aux/StORF_Undetected/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +46 -0
- ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
- ORForise/Aux/TabToGFF/__init__.py +0 -0
- ORForise/Aux/__init__.py +0 -0
- ORForise/Comparator.py +882 -0
- ORForise/Convert_To_GFF.py +141 -0
- ORForise/GFF_Adder.py +543 -0
- ORForise/List_Tools.py +56 -0
- ORForise/ORForise_Analysis/__init__.py +0 -0
- ORForise/ORForise_Analysis/cds_checker.py +77 -0
- ORForise/ORForise_Analysis/gene_Lenghts.py +28 -0
- ORForise/ORForise_Analysis/genome_Metrics.py +258 -0
- ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +88 -0
- ORForise/ORForise_Analysis/missed_Gene_Metrics.py +277 -0
- ORForise/ORForise_Analysis/parital_Match_Analysis.py +230 -0
- ORForise/ORForise_Analysis/result_File_Analysis.py +286 -0
- ORForise/ORForise_Analysis/start_Codon_Substitution.py +161 -0
- ORForise/StORForise.py +115 -0
- ORForise/Tools/Augustus/Augustus.py +54 -0
- ORForise/Tools/Augustus/__init__.py +0 -0
- ORForise/Tools/Balrog/Balrog.py +56 -0
- ORForise/Tools/Balrog/__init__.py +0 -0
- ORForise/Tools/EasyGene/EasyGene.py +55 -0
- ORForise/Tools/EasyGene/__init__.py +0 -0
- ORForise/Tools/FGENESB/FGENESB.py +57 -0
- ORForise/Tools/FGENESB/__init__.py +0 -0
- ORForise/Tools/FragGeneScan/FragGeneScan.py +54 -0
- ORForise/Tools/FragGeneScan/__init__.py +0 -0
- ORForise/Tools/GFF/GFF.py +77 -0
- ORForise/Tools/GFF/__init__.py +0 -0
- ORForise/Tools/GLIMMER3/GLIMMER3.py +59 -0
- ORForise/Tools/GLIMMER3/__init__.py +0 -0
- ORForise/Tools/GeneMark/GeneMark.py +135 -0
- ORForise/Tools/GeneMark/__init__.py +0 -0
- ORForise/Tools/GeneMarkHA/GeneMarkHA.py +54 -0
- ORForise/Tools/GeneMarkHA/__init__.py +0 -0
- ORForise/Tools/GeneMarkHMM/GeneMarkHMM.py +55 -0
- ORForise/Tools/GeneMarkHMM/__init__.py +0 -0
- ORForise/Tools/GeneMarkS/GeneMarkS.py +54 -0
- ORForise/Tools/GeneMarkS/__init__.py +0 -0
- ORForise/Tools/GeneMarkS2/GeneMarkS2.py +55 -0
- ORForise/Tools/GeneMarkS2/__init__.py +0 -0
- ORForise/Tools/MetaGene/MetaGene.py +54 -0
- ORForise/Tools/MetaGene/__init__.py +0 -0
- ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +55 -0
- ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- ORForise/Tools/MetaGeneMark/MetaGeneMark.py +55 -0
- ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- ORForise/Tools/Prodigal/Prodigal.py +55 -0
- ORForise/Tools/Prodigal/__init__.py +0 -0
- ORForise/Tools/Prokka/Prokka.py +57 -0
- ORForise/Tools/Prokka/__init__.py +0 -0
- ORForise/Tools/StORF-Reporter/StORF-Reporter.py +56 -0
- ORForise/Tools/StORF-Reporter/__init__.py +0 -0
- ORForise/Tools/TransDecoder/TransDecoder.py +54 -0
- ORForise/Tools/TransDecoder/__init__.py +0 -0
- ORForise/Tools/__init__.py +0 -0
- ORForise/__init__.py +0 -0
- ORForise/utils.py +236 -0
- orforise-1.6.2.dist-info/METADATA +1038 -0
- orforise-1.6.2.dist-info/RECORD +73 -0
- orforise-1.6.2.dist-info/WHEEL +5 -0
- orforise-1.6.2.dist-info/entry_points.txt +15 -0
- orforise-1.6.2.dist-info/licenses/LICENSE +624 -0
- orforise-1.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,56 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def StORF_Reporter(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as StORF-Reporter_input:
|
|
17
|
+
for line in StORF-Reporter_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if 'StORF-Reporter' in line[1] or 'StoRF_Reporter' in line[1] or 'StORF' in line[1] or 'StORF-Reporter' in line[1] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
storf_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
storf_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as storf_input:
|
|
34
|
+
for line in storf_input:
|
|
35
|
+
if not line.startswith('#') and not line.startswith('\n'):
|
|
36
|
+
line = line.split()
|
|
37
|
+
if 'StORF-Reporter' in line[1] or 'StoRF_Reporter' in line[1] or 'StORF' in line[1] or 'StORF-Reporter' in line[1] and dna_region in line[0]: # need to harmonise this.
|
|
38
|
+
start = int(line[3])
|
|
39
|
+
stop = int(line[4])
|
|
40
|
+
strand = line[6]
|
|
41
|
+
info = line[8]
|
|
42
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
43
|
+
r_start = genome_size - stop
|
|
44
|
+
r_stop = genome_size - start
|
|
45
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
46
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
47
|
+
elif '+' in strand:
|
|
48
|
+
startCodon = genome[start:start + 3]
|
|
49
|
+
stopCodon = genome[stop - 3:stop]
|
|
50
|
+
po = str(start) + ',' + str(stop)
|
|
51
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'StORF-Reporter'] # StORF/Con-StORF or CDS??
|
|
52
|
+
storf_ORFs.update({po: orf})
|
|
53
|
+
|
|
54
|
+
for group in storf_ORFs:
|
|
55
|
+
storf_ORFs[group] = sortORFs(storf_ORFs[group])
|
|
56
|
+
return storf_ORFs
|
|
File without changes
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def TransDecoder(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
dna_regions = args[1]
|
|
14
|
+
if not dna_regions: # This triggers if dna_regions is an empty dict (GFF_Intersect passed nothing)
|
|
15
|
+
dna_regions = collections.OrderedDict()
|
|
16
|
+
with open(tool_pred, 'r') as TransDecoder_input:
|
|
17
|
+
for line in TransDecoder_input:
|
|
18
|
+
line = line.split()
|
|
19
|
+
if len(line) == 9 and "transdecoder" in line[1] and "CDS" in line[2] and line[0] not in dna_regions:
|
|
20
|
+
dna_regions[line[0]] = [] # Placeholder for genome sequence
|
|
21
|
+
return dna_regions
|
|
22
|
+
|
|
23
|
+
transDecoder_ORFs = collections.OrderedDict()
|
|
24
|
+
for dna_region in dna_regions:
|
|
25
|
+
transDecoder_ORFs[dna_region] = collections.OrderedDict()
|
|
26
|
+
for dna_region in dna_regions:
|
|
27
|
+
try:
|
|
28
|
+
genome = dna_regions[dna_region][0]
|
|
29
|
+
except IndexError:
|
|
30
|
+
genome = dna_regions[dna_region]
|
|
31
|
+
genome_size = len(genome)
|
|
32
|
+
genome_rev = revCompIterative(genome)
|
|
33
|
+
with open(tool_pred, 'r') as transDecoder_Input:
|
|
34
|
+
for line in transDecoder_Input:
|
|
35
|
+
line = line.split()
|
|
36
|
+
if len(line) == 9 and "transdecoder" in line[1] and "CDS" in line[2] and dna_region in line[0]:
|
|
37
|
+
start = int(line[3])
|
|
38
|
+
stop = int(line[4])
|
|
39
|
+
strand = line[6]
|
|
40
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
41
|
+
r_start = genome_size - stop
|
|
42
|
+
r_stop = genome_size - start
|
|
43
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
44
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
45
|
+
elif '+' in strand:
|
|
46
|
+
startCodon = genome[start - 1:start + 2]
|
|
47
|
+
stopCodon = genome[stop - 3:stop]
|
|
48
|
+
po = str(start) + ',' + str(stop)
|
|
49
|
+
orf = [strand, startCodon, stopCodon, 'CDS', 'TransDecoder']
|
|
50
|
+
transDecoder_ORFs.update({po: orf})
|
|
51
|
+
|
|
52
|
+
for group in transDecoder_ORFs:
|
|
53
|
+
transDecoder_ORFs[group] = sortORFs(transDecoder_ORFs[group])
|
|
54
|
+
return transDecoder_ORFs
|
|
File without changes
|
|
File without changes
|
ORForise/__init__.py
ADDED
|
File without changes
|
ORForise/utils.py
ADDED
|
@@ -0,0 +1,236 @@
|
|
|
1
|
+
#!/usr/bin/env python3
|
|
2
|
+
import collections
|
|
3
|
+
|
|
4
|
+
# Constants
|
|
5
|
+
SHORT_ORF_LENGTH = 300
|
|
6
|
+
MIN_COVERAGE = 75
|
|
7
|
+
ORForise_Version = 'v1.6.1'
|
|
8
|
+
WELCOME=("Thank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
|
|
9
|
+
"Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
|
|
10
|
+
"#####")
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def revCompIterative(watson): # Gets Reverse Complement
|
|
14
|
+
return watson.upper()[::-1].translate(str.maketrans("ATCGRYKMVBHD","TAGCYRMKBVDH"))
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def sortORFs(tool_ORFs): # Will only sort by given start position
|
|
18
|
+
tool_ORFs_Sorted = sorted(tool_ORFs.items(), key=lambda v: int(v[0].split(",")[0]))
|
|
19
|
+
tool_ORFs_Sorted = collections.OrderedDict(tool_ORFs_Sorted)
|
|
20
|
+
return tool_ORFs_Sorted
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
def sortGenes(Genes): # Will sort by given start position and then rearrange for given stop
|
|
24
|
+
Genes_Sorted_list = sorted(Genes.values(), key=lambda v: int(v[0]))
|
|
25
|
+
Genes_Sorted = []
|
|
26
|
+
for idx,gene in enumerate(Genes_Sorted_list):
|
|
27
|
+
Genes_Sorted.append([idx,gene])
|
|
28
|
+
Genes_Sorted = collections.OrderedDict(Genes_Sorted)
|
|
29
|
+
prev_stop = 0
|
|
30
|
+
for pos, detail in Genes_Sorted.items():
|
|
31
|
+
if detail[1] < prev_stop:
|
|
32
|
+
Genes_Sorted[pos], Genes_Sorted[pos-1] = Genes_Sorted[pos-1], Genes_Sorted[pos]
|
|
33
|
+
prev_stop = detail[1]
|
|
34
|
+
return Genes_Sorted
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def gff_load(options,gff_in,dna_regions):
|
|
38
|
+
count = 0
|
|
39
|
+
for line in gff_in: # Get gene loci from GFF - ID=Gene will also classify Pseudogenes as genes
|
|
40
|
+
line_data = line.split('\t')
|
|
41
|
+
if line.startswith('\n') or line.startswith('#') or 'European Nucleotide Archive' in line: # Not to crash on empty lines in GFF
|
|
42
|
+
continue
|
|
43
|
+
elif options.gene_ident[0] == 'ID=gene':
|
|
44
|
+
if line_data[0] in dna_regions and options.gene_ident[0] in line_data[8]:
|
|
45
|
+
start = int(line_data[3])
|
|
46
|
+
stop = int(line_data[4])
|
|
47
|
+
strand = line_data[6]
|
|
48
|
+
gene_details = [start,stop,strand]
|
|
49
|
+
dna_regions[line_data[0]][2].append({count:gene_details}) # This will add to list
|
|
50
|
+
count += 1
|
|
51
|
+
else:
|
|
52
|
+
try:
|
|
53
|
+
if line_data[2] == 'region':
|
|
54
|
+
continue
|
|
55
|
+
elif line_data[0] in dna_regions:
|
|
56
|
+
if any(gene_type in line_data[2] for gene_type in options.gene_ident): # line[2] for normal run
|
|
57
|
+
start = int(line_data[3])
|
|
58
|
+
stop = int(line_data[4])
|
|
59
|
+
strand = line_data[6]
|
|
60
|
+
gene_details = [start, stop, strand]
|
|
61
|
+
if gene_details not in dna_regions[line_data[0]][2]:
|
|
62
|
+
dna_regions[line_data[0]][2].append({count:gene_details}) # This will add to list
|
|
63
|
+
count += 1
|
|
64
|
+
except IndexError:
|
|
65
|
+
continue
|
|
66
|
+
return dna_regions
|
|
67
|
+
|
|
68
|
+
|
|
69
|
+
def fasta_load(fasta_in):
|
|
70
|
+
dna_regions = collections.OrderedDict()
|
|
71
|
+
first = True
|
|
72
|
+
if '>' in fasta_in.readline().rstrip():
|
|
73
|
+
fasta_in.seek(0)
|
|
74
|
+
#### Default for when presented with standard fasta file
|
|
75
|
+
for line in fasta_in:
|
|
76
|
+
line = line.strip()
|
|
77
|
+
if line.startswith('>') and first == False: # Check if first seq in file
|
|
78
|
+
dna_region_length = len(seq)
|
|
79
|
+
dna_regions.update({dna_region_id: (seq, dna_region_length, list(), None)})
|
|
80
|
+
seq = ''
|
|
81
|
+
dna_region_id = line.split()[0].replace('>', '')
|
|
82
|
+
elif line.startswith('>'):
|
|
83
|
+
seq = ''
|
|
84
|
+
dna_region_id = line.split()[0].replace('>', '')
|
|
85
|
+
else:
|
|
86
|
+
seq += str(line)
|
|
87
|
+
first = False
|
|
88
|
+
dna_region_length = len(seq)
|
|
89
|
+
dna_regions.update({dna_region_id: (seq, dna_region_length, list(), None)})
|
|
90
|
+
elif '##' in fasta_in.readline().rstrip(): # Clunky and may fall over
|
|
91
|
+
fasta_in.seek(0)
|
|
92
|
+
#### Called when presented with Prokka GFF file so must get fasta from inside it
|
|
93
|
+
### Get to genome seq
|
|
94
|
+
at_FASTA = False
|
|
95
|
+
for line in fasta_in: # Get gene loci from GFF - ID=Gene will also classify Pseudogenes as genes
|
|
96
|
+
if line.startswith('##FASTA'): # Not to crash on empty lines in GFF
|
|
97
|
+
at_FASTA = True
|
|
98
|
+
elif at_FASTA == True:
|
|
99
|
+
line = line.strip()
|
|
100
|
+
if line.startswith('>') and first == False: # Check if first seq in file
|
|
101
|
+
dna_region_length = len(seq)
|
|
102
|
+
dna_regions.update({dna_region_id: (seq, dna_region_length, list(), None)})
|
|
103
|
+
seq = ''
|
|
104
|
+
dna_region_id = line.split()[0].replace('>', '')
|
|
105
|
+
elif line.startswith('>'):
|
|
106
|
+
seq = ''
|
|
107
|
+
dna_region_id = line.split()[0].replace('>', '')
|
|
108
|
+
else:
|
|
109
|
+
seq += str(line)
|
|
110
|
+
first = False
|
|
111
|
+
dna_region_length = len(seq)
|
|
112
|
+
dna_regions.update({dna_region_id: (seq, dna_region_length, list(), None)})
|
|
113
|
+
|
|
114
|
+
return dna_regions
|
|
115
|
+
|
|
116
|
+
|
|
117
|
+
def get_rep_metrics(result):
|
|
118
|
+
rep_metric_description = ('Percentage_of_Genes_Detected,Percentage_of_ORFs_that_Detected_a_Gene,'
|
|
119
|
+
'Percent_Difference_of_All_ORFs,Median_Length_Difference,Percentage_of_Perfect_Matches,'
|
|
120
|
+
'Median_Start_Difference_of_Matched_ORFs,Median_Stop_Difference_of_Matched_ORFs,'
|
|
121
|
+
'Percentage_Difference_of_Matched_Overlapping_CDSs,Percent_Difference_of_Short-Matched-ORFs,'
|
|
122
|
+
'Precision,Recall,False_Discovery_Rate')
|
|
123
|
+
rep_metrics = [result['rep_metrics']['Percentage_of_Genes_Detected'],
|
|
124
|
+
result['rep_metrics']['Percentage_of_ORFs_that_Detected_a_Gene'],
|
|
125
|
+
result['rep_metrics']['Percent_Difference_of_All_ORFs'],
|
|
126
|
+
result['rep_metrics']['Median_Length_Difference'],
|
|
127
|
+
result['rep_metrics']['Percentage_of_Perfect_Matches'],
|
|
128
|
+
result['rep_metrics']['Median_Start_Difference_of_Matched_ORFs'],
|
|
129
|
+
result['rep_metrics']['Median_Stop_Difference_of_Matched_ORFs'],
|
|
130
|
+
result['rep_metrics']['Percentage_Difference_of_Matched_Overlapping_CDSs'],
|
|
131
|
+
result['rep_metrics']['Percent_Difference_of_Short-Matched-ORFs'],
|
|
132
|
+
result['rep_metrics']['Precision'],
|
|
133
|
+
result['rep_metrics']['Recall'],
|
|
134
|
+
result['rep_metrics']['False_Discovery_Rate']]
|
|
135
|
+
return rep_metric_description, rep_metrics
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def get_all_metrics(result):
|
|
139
|
+
all_metric_description = ('Number_of_ORFs,Percent_Difference_of_All_ORFs,Number_of_ORFs_that_Detected_a_Gene,'
|
|
140
|
+
'Percentage_of_ORFs_that_Detected_a_Gene,Number_of_Genes_Detected,Percentage_of_Genes_Detected,'
|
|
141
|
+
'Median_Length_of_All_ORFs,Median_Length_Difference,Minimum_Length_of_All_ORFs,Minimum_Length_Difference,'
|
|
142
|
+
'Maximum_Length_of_All_ORFs,Maximum_Length_Difference,Median_GC_content_of_All_ORFs,'
|
|
143
|
+
'Percent_Difference_of_All_ORFs_Median_GC,Median_GC_content_of_Matched_ORFs,'
|
|
144
|
+
'Percent_Difference_of_Matched_ORF_GC,Number_of_ORFs_which_Overlap_Another_ORF,'
|
|
145
|
+
'Percent_Difference_of_Overlapping_ORFs,Maximum_ORF_Overlap,Median_ORF_Overlap,'
|
|
146
|
+
'Number_of_Matched_ORFs_Overlapping_Another_ORF,Percentage_Difference_of_Matched_Overlapping_CDSs,'
|
|
147
|
+
'Maximum_Matched_ORF_Overlap,Median_Matched_ORF_Overlap,Number_of_Short-ORFs,Percent_Difference_of_Short-ORFs,'
|
|
148
|
+
'Number_of_Short-Matched-ORFs,Percent_Difference_of_Short-Matched-ORFs,Number_of_Perfect_Matches,'
|
|
149
|
+
'Percentage_of_Perfect_Matches,Number_of_Perfect_Starts,Percentage_of_Perfect_Starts,Number_of_Perfect_Stops,'
|
|
150
|
+
'Percentage_of_Perfect_Stops,Number_of_Out_of_Frame_ORFs,Number_of_Matched_ORFs_Extending_a_Coding_Region,'
|
|
151
|
+
'Percentage_of_Matched_ORFs_Extending_a_Coding_Region,Number_of_Matched_ORFs_Extending_Start_Region,'
|
|
152
|
+
'Percentage_of_Matched_ORFs_Extending_Start_Region,Number_of_Matched_ORFs_Extending_Stop_Region,'
|
|
153
|
+
'Percentage_of_Matched_ORFs_Extending_Stop_Region,Number_of_All_ORFs_on_Positive_Strand,'
|
|
154
|
+
'Percentage_of_All_ORFs_on_Positive_Strand,Number_of_All_ORFs_on_Negative_Strand,'
|
|
155
|
+
'Percentage_of_All_ORFs_on_Negative_Strand,Median_Start_Difference_of_Matched_ORFs,'
|
|
156
|
+
'Median_Stop_Difference_of_Matched_ORFs,ATG_Start_Percentage,GTG_Start_Percentage,TTG_Start_Percentage,'
|
|
157
|
+
'ATT_Start_Percentage,CTG_Start_Percentage,Other_Start_Codon_Percentage,TAG_Stop_Percentage,'
|
|
158
|
+
'TAA_Stop_Percentage,TGA_Stop_Percentage,Other_Stop_Codon_Percentage,True_Positive,False_Positive,'
|
|
159
|
+
'False_Negative,Precision,Recall,False_Discovery_Rate,Nucleotide_True_Positive,Nucleotide_False_Positive,'
|
|
160
|
+
'Nucleotide_True_Negative,Nucleotide_False_Negative,Nucleotide_Precision,Nucleotide_Recall,'
|
|
161
|
+
'Nucleotide_False_Discovery_Rate,ORF_Nucleotide_Coverage_of_Genome,Matched_ORF_Nucleotide_Coverage_of_Genome')
|
|
162
|
+
all_metrics = rep_metrics = [result['pred_metrics']['Number_of_ORFs'],
|
|
163
|
+
result['pred_metrics']['Percent_Difference_of_All_ORFs'],
|
|
164
|
+
result['pred_metrics']['Number_of_ORFs_that_Detected_a_Gene'],
|
|
165
|
+
result['pred_metrics']['Percentage_of_ORFs_that_Detected_a_Gene'],
|
|
166
|
+
result['pred_metrics']['Number_of_Genes_Detected'],
|
|
167
|
+
result['pred_metrics']['Percentage_of_Genes_Detected'],
|
|
168
|
+
result['pred_metrics']['Median_Length_of_All_ORFs'],
|
|
169
|
+
result['pred_metrics']['Median_Length_Difference'],
|
|
170
|
+
result['pred_metrics']['Minimum_Length_of_All_ORFs'],
|
|
171
|
+
result['pred_metrics']['Minimum_Length_Difference'],
|
|
172
|
+
result['pred_metrics']['Maximum_Length_of_All_ORFs'],
|
|
173
|
+
result['pred_metrics']['Maximum_Length_Difference'],
|
|
174
|
+
result['pred_metrics']['Median_GC_content_of_All_ORFs'],
|
|
175
|
+
result['pred_metrics']['Percent_Difference_of_All_ORFs_Median_GC'],
|
|
176
|
+
result['pred_metrics']['Median_GC_content_of_Matched_ORFs'],
|
|
177
|
+
result['pred_metrics']['Percent_Difference_of_Matched_ORF_GC'],
|
|
178
|
+
result['pred_metrics']['Number_of_ORFs_which_Overlap_Another_ORF'],
|
|
179
|
+
result['pred_metrics']['Percent_Difference_of_Overlapping_ORFs'],
|
|
180
|
+
result['pred_metrics']['Maximum_ORF_Overlap'],
|
|
181
|
+
result['pred_metrics']['Median_ORF_Overlap'],
|
|
182
|
+
result['pred_metrics']['Number_of_Matched_ORFs_Overlapping_Another_ORF'],
|
|
183
|
+
result['pred_metrics']['Percentage_Difference_of_Matched_Overlapping_CDSs'],
|
|
184
|
+
result['pred_metrics']['Maximum_Matched_ORF_Overlap'],
|
|
185
|
+
result['pred_metrics']['Median_Matched_ORF_Overlap'],
|
|
186
|
+
result['pred_metrics']['Number_of_Short-ORFs'],
|
|
187
|
+
result['pred_metrics']['Percent_Difference_of_Short-ORFs'],
|
|
188
|
+
result['pred_metrics']['Number_of_Short-Matched-ORFs'],
|
|
189
|
+
result['pred_metrics']['Percent_Difference_of_Short-Matched-ORFs'],
|
|
190
|
+
result['pred_metrics']['Number_of_Perfect_Matches'],
|
|
191
|
+
result['pred_metrics']['Percentage_of_Perfect_Matches'],
|
|
192
|
+
result['pred_metrics']['Number_of_Perfect_Starts'],
|
|
193
|
+
result['pred_metrics']['Percentage_of_Perfect_Starts'],
|
|
194
|
+
result['pred_metrics']['Number_of_Perfect_Stops'],
|
|
195
|
+
result['pred_metrics']['Percentage_of_Perfect_Stops'],
|
|
196
|
+
result['pred_metrics']['Number_of_Out_of_Frame_ORFs'],
|
|
197
|
+
result['pred_metrics']['Number_of_Matched_ORFs_Extending_a_Coding_Region'],
|
|
198
|
+
result['pred_metrics']['Percentage_of_Matched_ORFs_Extending_a_Coding_Region'],
|
|
199
|
+
result['pred_metrics']['Number_of_Matched_ORFs_Extending_Start_Region'],
|
|
200
|
+
result['pred_metrics']['Percentage_of_Matched_ORFs_Extending_Start_Region'],
|
|
201
|
+
result['pred_metrics']['Number_of_Matched_ORFs_Extending_Stop_Region'],
|
|
202
|
+
result['pred_metrics']['Percentage_of_Matched_ORFs_Extending_Stop_Region'],
|
|
203
|
+
result['pred_metrics']['Number_of_All_ORFs_on_Positive_Strand'],
|
|
204
|
+
result['pred_metrics']['Percentage_of_All_ORFs_on_Positive_Strand'],
|
|
205
|
+
result['pred_metrics']['Number_of_All_ORFs_on_Negative_Strand'],
|
|
206
|
+
result['pred_metrics']['Percentage_of_All_ORFs_on_Negative_Strand'],
|
|
207
|
+
result['pred_metrics']['Median_Start_Difference_of_Matched_ORFs'],
|
|
208
|
+
result['pred_metrics']['Median_Stop_Difference_of_Matched_ORFs'],
|
|
209
|
+
result['pred_metrics']['ATG_Start_Percentage'],
|
|
210
|
+
result['pred_metrics']['GTG_Start_Percentage'],
|
|
211
|
+
result['pred_metrics']['TTG_Start_Percentage'],
|
|
212
|
+
result['pred_metrics']['ATT_Start_Percentage'],
|
|
213
|
+
result['pred_metrics']['CTG_Start_Percentage'],
|
|
214
|
+
result['pred_metrics']['Other_Start_Codon_Percentage'],
|
|
215
|
+
result['pred_metrics']['TAG_Stop_Percentage'],
|
|
216
|
+
result['pred_metrics']['TAA_Stop_Percentage'],
|
|
217
|
+
result['pred_metrics']['TGA_Stop_Percentage'],
|
|
218
|
+
result['pred_metrics']['Other_Stop_Codon_Percentage'],
|
|
219
|
+
result['pred_metrics']['True_Positive'],
|
|
220
|
+
result['pred_metrics']['False_Positive'],
|
|
221
|
+
result['pred_metrics']['False_Negative'],
|
|
222
|
+
result['pred_metrics']['Precision'],
|
|
223
|
+
result['pred_metrics']['Recall'],
|
|
224
|
+
result['pred_metrics']['False_Discovery_Rate'],
|
|
225
|
+
result['pred_metrics']['Nucleotide_True_Positive'],
|
|
226
|
+
result['pred_metrics']['Nucleotide_False_Positive'],
|
|
227
|
+
result['pred_metrics']['Nucleotide_True_Negative'],
|
|
228
|
+
result['pred_metrics']['Nucleotide_False_Negative'],
|
|
229
|
+
result['pred_metrics']['Nucleotide_Precision'],
|
|
230
|
+
result['pred_metrics']['Nucleotide_Recall'],
|
|
231
|
+
result['pred_metrics']['Nucleotide_False_Discovery_Rate'],
|
|
232
|
+
result['pred_metrics']['ORF_Nucleotide_Coverage_of_Genome'],
|
|
233
|
+
result['pred_metrics']['Matched_ORF_Nucleotide_Coverage_of_Genome']]
|
|
234
|
+
|
|
235
|
+
|
|
236
|
+
return all_metric_description, all_metrics
|