ORForise 1.4.0__tar.gz → 1.4.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ORForise-1.4.0 → orforise-1.4.2}/PKG-INFO +7 -6
- {ORForise-1.4.0 → orforise-1.4.2}/README.md +5 -5
- {ORForise-1.4.0 → orforise-1.4.2}/setup.cfg +1 -1
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Annotation_Compare.py +4 -8
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Comparator.py +68 -48
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/GFF_Adder.py +0 -2
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/StORForise.py +10 -10
- orforise-1.4.2/src/ORForise/Tools/GFF/GFF.py +62 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +4 -3
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/utils.py +2 -13
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/PKG-INFO +7 -6
- ORForise-1.4.0/src/ORForise/Tools/GFF/GFF.py +0 -44
- {ORForise-1.4.0 → orforise-1.4.2}/LICENSE +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/pyproject.toml +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Aggregate_Compare.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/GFF_Intersector.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Augustus/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Balrog/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/EasyGene/EasyGene.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/EasyGene/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FGENESB/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FragGeneScan/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GFF/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GLIMMER_3/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HA/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HMM/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S_2/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGene/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prodigal/Prodigal.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prodigal/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prokka/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Reporter/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/StORF_Undetected.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/TransDecoder/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/__init__.py +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/SOURCES.txt +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/dependency_links.txt +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/entry_points.txt +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/requires.txt +0 -0
- {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.6
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: numpy
|
|
15
16
|
|
|
16
17
|
# ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
|
|
17
18
|
## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
|
|
@@ -20,7 +21,7 @@ License-File: LICENSE
|
|
|
20
21
|
|
|
21
22
|
# Requirements and Installation:
|
|
22
23
|
|
|
23
|
-
### The ORForise platform is written in
|
|
24
|
+
### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
|
|
24
25
|
|
|
25
26
|
## Intallation:
|
|
26
27
|
|
|
@@ -61,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
61
62
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
62
63
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
63
64
|
|
|
64
|
-
ORForise v1.4.
|
|
65
|
+
ORForise v1.4.2: Annotatione-Compare Run Parameters.
|
|
65
66
|
|
|
66
67
|
Required Arguments:
|
|
67
68
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -111,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
111
112
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
112
113
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
113
114
|
|
|
114
|
-
ORForise v1.4.
|
|
115
|
+
ORForise v1.4.2: Aggregate-Compare Run Parameters.
|
|
115
116
|
|
|
116
117
|
Required Arguments:
|
|
117
118
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -265,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
265
266
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
266
267
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
267
268
|
|
|
268
|
-
ORForise v1.4.
|
|
269
|
+
ORForise v1.4.2: GFF-Adder Run Parameters.
|
|
269
270
|
|
|
270
271
|
Required Arguments:
|
|
271
272
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -327,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
327
328
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
328
329
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
329
330
|
|
|
330
|
-
ORForise v1.4.
|
|
331
|
+
ORForise v1.4.2: GFF-Intersector Run Parameters.
|
|
331
332
|
|
|
332
333
|
Required Arguments:
|
|
333
334
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
|
|
6
6
|
# Requirements and Installation:
|
|
7
7
|
|
|
8
|
-
### The ORForise platform is written in
|
|
8
|
+
### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
|
|
9
9
|
|
|
10
10
|
## Intallation:
|
|
11
11
|
|
|
@@ -46,7 +46,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
46
46
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
47
47
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
48
48
|
|
|
49
|
-
ORForise v1.4.
|
|
49
|
+
ORForise v1.4.2: Annotatione-Compare Run Parameters.
|
|
50
50
|
|
|
51
51
|
Required Arguments:
|
|
52
52
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -96,7 +96,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
96
96
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
97
97
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
98
98
|
|
|
99
|
-
ORForise v1.4.
|
|
99
|
+
ORForise v1.4.2: Aggregate-Compare Run Parameters.
|
|
100
100
|
|
|
101
101
|
Required Arguments:
|
|
102
102
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -250,7 +250,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
250
250
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
251
251
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
252
252
|
|
|
253
|
-
ORForise v1.4.
|
|
253
|
+
ORForise v1.4.2: GFF-Adder Run Parameters.
|
|
254
254
|
|
|
255
255
|
Required Arguments:
|
|
256
256
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -312,7 +312,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
312
312
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
313
313
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
314
314
|
|
|
315
|
-
ORForise v1.4.
|
|
315
|
+
ORForise v1.4.2: GFF-Intersector Run Parameters.
|
|
316
316
|
|
|
317
317
|
Required Arguments:
|
|
318
318
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -16,12 +16,8 @@ except ImportError:
|
|
|
16
16
|
##########################
|
|
17
17
|
|
|
18
18
|
def comparator(options):
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
for line in genome:
|
|
22
|
-
line = line.replace("\n", "")
|
|
23
|
-
if not line.startswith('>'):
|
|
24
|
-
genome_Seq += str(line)
|
|
19
|
+
with open(options.genome_DNA, mode='r') as genome:
|
|
20
|
+
genome_Seq = "".join(line.rstrip() for line in genome if not line.startswith('>'))
|
|
25
21
|
##############################################
|
|
26
22
|
if not options.reference_tool: # IF using Ensembl for comparison
|
|
27
23
|
ref_genes = collections.OrderedDict() # Order is important
|
|
@@ -76,11 +72,11 @@ def comparator(options):
|
|
|
76
72
|
rep_metric_description = list(all_rep_Metrics.keys())
|
|
77
73
|
rep_metrics = list(all_rep_Metrics.values())
|
|
78
74
|
############## Printing to std-out and optional csv file
|
|
79
|
-
print('Genome Used: ' + str(options.
|
|
75
|
+
print('Genome Used: ' + str(options.genome_DNA.split('/')[-1]))
|
|
80
76
|
if options.reference_tool:
|
|
81
77
|
print('Reference Tool Used: '+str(options.reference_tool))
|
|
82
78
|
else:
|
|
83
|
-
print('Reference Used: ' + str(options.reference_annotation))
|
|
79
|
+
print('Reference Used: ' + str(options.reference_annotation.split('/')[-1]))
|
|
84
80
|
print('Tool Compared: '+str(options.tool))
|
|
85
81
|
print('Perfect Matches: ' + str(len(perfect_Matches)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(perfect_Matches)/len(ref_genes),'.2f')+'%')
|
|
86
82
|
print('Partial Matches: ' + str(len(partial_Hits)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(partial_Hits)/len(ref_genes),'.2f')+'%')
|
|
@@ -1,5 +1,4 @@
|
|
|
1
1
|
import numpy as np
|
|
2
|
-
|
|
3
2
|
try:
|
|
4
3
|
from utils import *
|
|
5
4
|
except ImportError:
|
|
@@ -46,14 +45,32 @@ comp = comparator()
|
|
|
46
45
|
# else:
|
|
47
46
|
# print ('Key not found')
|
|
48
47
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
48
|
+
def is_double_range(range1, range2):
|
|
49
|
+
return len(range1) >= 2 * len(range2)
|
|
50
|
+
def nuc_Count(verbose, start, stop, strand): # Gets correct seq then returns GC
|
|
51
|
+
if stop >= comp.genome_Size:
|
|
52
|
+
if verbose == True:
|
|
53
|
+
print("There is a wrap around gene and I am dealing with it the best I can - Start: " + str(start) + " Stop: " + str(stop))
|
|
54
|
+
extra_stop = stop - comp.genome_Size
|
|
55
|
+
stop = comp.genome_Size
|
|
56
|
+
if strand == '-':
|
|
57
|
+
r_Start = comp.genome_Size - stop
|
|
58
|
+
r_Stop = comp.genome_Size - start
|
|
59
|
+
seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
|
|
60
|
+
extra_seq = (comp.genome_Seq_Rev[-extra_stop-1:])
|
|
61
|
+
seq = extra_seq+seq
|
|
62
|
+
elif strand == '+':
|
|
63
|
+
seq = comp.genome_Seq[start - 1:stop]
|
|
64
|
+
extra_seq = comp.genome_Seq[:extra_stop +1]
|
|
65
|
+
seq = seq+extra_seq
|
|
66
|
+
#seq = (comp.genome_Seq[start - 1:stop])
|
|
67
|
+
else:
|
|
68
|
+
if strand == '-':
|
|
69
|
+
r_Start = comp.genome_Size - stop
|
|
70
|
+
r_Stop = comp.genome_Size - start
|
|
71
|
+
seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
|
|
72
|
+
elif strand == '+':
|
|
73
|
+
seq = (comp.genome_Seq[start - 1:stop])
|
|
57
74
|
c = 0
|
|
58
75
|
a = 0
|
|
59
76
|
g = 0
|
|
@@ -263,6 +280,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
263
280
|
comp.genome_Seq = genome
|
|
264
281
|
comp.genome_Seq_Rev = revCompIterative(genome)
|
|
265
282
|
comp.genome_Size = len(genome)
|
|
283
|
+
|
|
284
|
+
better_pos_orfs_items = [[(int(pos.split(',')[0]), int(pos.split(',')[1])), orf_Details] for pos, orf_Details in orfs.items()] #TODO: turn pos into tuple instead of string everywhere
|
|
285
|
+
|
|
266
286
|
for gene_num, gene_details in ref_genes.items(): # Loop through each gene to compare against predicted ORFs
|
|
267
287
|
g_Start = int(gene_details[0])
|
|
268
288
|
g_Stop = int(gene_details[1])
|
|
@@ -273,9 +293,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
273
293
|
overlapping_ORFs = collections.OrderedDict()
|
|
274
294
|
perfect_Match = False
|
|
275
295
|
out_Frame = False
|
|
276
|
-
for pos, orf_Details in
|
|
277
|
-
o_Start =
|
|
278
|
-
o_Stop = int(pos.split(',')[1])
|
|
296
|
+
for pos, orf_Details in better_pos_orfs_items: # Check if perfect match, if not check if match covers at least 75% of gene - Loop through ALL ORFs - SLOW
|
|
297
|
+
o_Start,o_Stop = pos
|
|
279
298
|
o_Strand = orf_Details[0]
|
|
280
299
|
#orf_Set = set(range(o_Start, o_Stop + 1)) Removed for optimisation
|
|
281
300
|
if o_Stop <= g_Start or o_Start >= g_Stop: # Not caught up yet
|
|
@@ -283,15 +302,17 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
283
302
|
elif o_Start == g_Start and o_Stop == g_Stop: # If perfect match, break and skip the rest of the ORFs
|
|
284
303
|
perfect_Match = True
|
|
285
304
|
break
|
|
305
|
+
elif is_double_range(range(o_Start, o_Stop), range(g_Start,g_Stop)): # If ORF is double or more than the length of the gene, we do not count as found.
|
|
306
|
+
continue
|
|
286
307
|
elif g_Start <= o_Start < g_Stop or g_Start < o_Stop < g_Stop: # If ORF Start or Stop is between gene Start or Stop
|
|
287
308
|
#overlap = len(gene_Set.intersection(orf_Set)) # Replaced for optimisation
|
|
288
309
|
overlap = max(min(o_Stop, g_Stop) - max(o_Start, g_Start), -1) + 1
|
|
289
310
|
coverage = 100 * float(overlap) / float(len(gene_Set))
|
|
290
311
|
orf_Details.append(coverage)
|
|
291
312
|
if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and coverage >= MIN_COVERAGE: # Only continue if ORF covers at least 75% of the gene and is in frame
|
|
292
|
-
overlapping_ORFs.update({
|
|
313
|
+
overlapping_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
|
|
293
314
|
elif coverage >= MIN_COVERAGE: # Not in frame / on same strand
|
|
294
|
-
comp.out_Of_Frame_ORFs.update({
|
|
315
|
+
comp.out_Of_Frame_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
|
|
295
316
|
out_Frame = True
|
|
296
317
|
elif o_Start <= g_Start and o_Stop >= g_Stop: # If ORF extends one or both ends of the gene
|
|
297
318
|
#overlap = len(gene_Set.intersection(orf_Set)) # Replaced for optimisation
|
|
@@ -299,9 +320,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
299
320
|
coverage = 100 * float(overlap) / float(len(gene_Set))
|
|
300
321
|
orf_Details.append(coverage)
|
|
301
322
|
if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and coverage >= MIN_COVERAGE: # Only continue if ORF covers at least 75% of the gene and is in frame
|
|
302
|
-
overlapping_ORFs.update({
|
|
323
|
+
overlapping_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
|
|
303
324
|
elif coverage >= MIN_COVERAGE:
|
|
304
|
-
comp.out_Of_Frame_ORFs.update({
|
|
325
|
+
comp.out_Of_Frame_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
|
|
305
326
|
out_Frame = True
|
|
306
327
|
else:
|
|
307
328
|
if verbose == True:
|
|
@@ -319,8 +340,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
319
340
|
comp.genes_Detected.update({str(gene_details): g_pos})
|
|
320
341
|
match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
|
|
321
342
|
perfect_Matched_Genes(g_Start, g_Stop, g_Strand)
|
|
322
|
-
if verbose == True:
|
|
323
|
-
|
|
343
|
+
#if verbose == True:
|
|
344
|
+
# print('Perfect Match')
|
|
324
345
|
elif perfect_Match == False and len(
|
|
325
346
|
overlapping_ORFs) == 1: # If we do not have a perfect match but 1 ORF which has passed the filtering
|
|
326
347
|
orf_Pos = list(overlapping_ORFs.keys())[0]
|
|
@@ -340,8 +361,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
340
361
|
comp.matched_ORFs.update({orf_Pos: m_ORF_Details})
|
|
341
362
|
comp.genes_Detected.update({str(gene_details): orf_Pos})
|
|
342
363
|
match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
|
|
343
|
-
if verbose == True:
|
|
344
|
-
|
|
364
|
+
#if verbose == True:
|
|
365
|
+
# print('Partial Match')
|
|
345
366
|
partial_Hit_Calc(g_Start, g_Stop, g_Strand, o_Start, o_Stop)
|
|
346
367
|
elif perfect_Match == False and len(
|
|
347
368
|
overlapping_ORFs) >= 1: # If we have more than 1 potential ORF match, we check to see which is the 'best' hit
|
|
@@ -370,8 +391,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
370
391
|
genes_Unmatched(g_Start, g_Stop, g_Strand) #
|
|
371
392
|
else:
|
|
372
393
|
genes_Unmatched(g_Start, g_Stop, g_Strand) # No hit
|
|
373
|
-
if verbose == True:
|
|
374
|
-
|
|
394
|
+
#if verbose == True:
|
|
395
|
+
# print("No Hit")
|
|
375
396
|
for orf_Key in comp.matched_ORFs: # Remove ORFs from out of frame if ORF was correctly matched to another Gene
|
|
376
397
|
if orf_Key in comp.out_Of_Frame_ORFs:
|
|
377
398
|
del comp.out_Of_Frame_ORFs[orf_Key]
|
|
@@ -391,9 +412,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
391
412
|
atg_P, gtg_P, ttg_P, att_P, ctg_P, other_Start_P, other_Starts = start_Codon_Count(orfs)
|
|
392
413
|
tag_P, taa_P, tga_P, other_Stop_P, other_Stops = stop_Codon_Count(orfs)
|
|
393
414
|
# Count nucleotides found from ALL ORFs
|
|
394
|
-
gene_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.
|
|
395
|
-
orf_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.
|
|
396
|
-
matched_ORF_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.
|
|
415
|
+
gene_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
|
|
416
|
+
orf_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
|
|
417
|
+
matched_ORF_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
|
|
397
418
|
|
|
398
419
|
prev_Gene_Stop = 0
|
|
399
420
|
prev_Gene_Overlapped = False
|
|
@@ -401,10 +422,11 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
401
422
|
g_Start = int(gene_details[0])
|
|
402
423
|
g_Stop = int(gene_details[1])
|
|
403
424
|
g_Strand = gene_details[2]
|
|
404
|
-
gene_Length = (g_Stop - g_Start)
|
|
425
|
+
gene_Length = (g_Stop - g_Start) +1
|
|
426
|
+
if gene_Length == 0: print(g_Start, g_Stop, "!!!!!!!!!!!!!!!!!!!!!!!!")
|
|
405
427
|
comp.gene_Lengths.append(gene_Length)
|
|
406
|
-
gene_Nuc_Array[g_Start - 1:g_Stop] =
|
|
407
|
-
comp.gene_GC.append(nuc_Count(g_Start, g_Stop, g_Strand))
|
|
428
|
+
gene_Nuc_Array[g_Start - 1:g_Stop] = True # Changing all between the two positions to 1's
|
|
429
|
+
comp.gene_GC.append(nuc_Count(verbose, g_Start, g_Stop, g_Strand))
|
|
408
430
|
if gene_Length <= SHORT_ORF_LENGTH: # .utils
|
|
409
431
|
comp.gene_Short.append(gene_Length)
|
|
410
432
|
### Calculate overlapping Genes -
|
|
@@ -445,10 +467,10 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
445
467
|
comp.pos_Strand += 1
|
|
446
468
|
elif o_Strand == "-":
|
|
447
469
|
comp.neg_Strand += 1
|
|
448
|
-
orf_Length = (o_Stop - o_Start)
|
|
470
|
+
orf_Length = (o_Stop - o_Start) +1
|
|
449
471
|
comp.orf_Lengths.append(orf_Length)
|
|
450
|
-
orf_Nuc_Array[o_Start - 1:o_Stop] =
|
|
451
|
-
comp.orf_GC.append(nuc_Count(o_Start, o_Stop, o_Strand))
|
|
472
|
+
orf_Nuc_Array[o_Start - 1:o_Stop] = True # Changing all between the two positions to 1's
|
|
473
|
+
comp.orf_GC.append(nuc_Count(verbose, o_Start, o_Stop, o_Strand))
|
|
452
474
|
if orf_Length <= SHORT_ORF_LENGTH: # .utils
|
|
453
475
|
comp.orf_Short.append(orf_Length)
|
|
454
476
|
### Calculate overlapping ORFs -
|
|
@@ -480,9 +502,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
480
502
|
mo_Stop = int(mo_Positions.split(',')[1])
|
|
481
503
|
mo_Strand = m_ORF_Details[0]
|
|
482
504
|
mo_Length = (mo_Stop - mo_Start)
|
|
483
|
-
matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] =
|
|
505
|
+
matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = True # This is the complete matched orf not the matched orf bits
|
|
484
506
|
|
|
485
|
-
comp.m_ORF_GC.append(nuc_Count(mo_Start, mo_Stop, mo_Strand))
|
|
507
|
+
comp.m_ORF_GC.append(nuc_Count(verbose, mo_Start, mo_Stop, mo_Strand))
|
|
486
508
|
if mo_Length <= SHORT_ORF_LENGTH: # .utils
|
|
487
509
|
comp.m_ORF_Short.append(mo_Length)
|
|
488
510
|
### Calculate overlapping Matched ORFs -
|
|
@@ -506,30 +528,28 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
506
528
|
elif '-' in mo_Strand:
|
|
507
529
|
comp.m_ORF_Neg_Olap.append(0)
|
|
508
530
|
####
|
|
509
|
-
gene_Coverage_Genome = format(100 * np.
|
|
510
|
-
orf_Coverage_Genome = format(100 * np.
|
|
511
|
-
matched_ORF_Coverage_Genome = format(100 * np.
|
|
531
|
+
gene_Coverage_Genome = format(100 * np.sum(gene_Nuc_Array) / comp.genome_Size, '.2f')
|
|
532
|
+
orf_Coverage_Genome = format(100 * np.sum(orf_Nuc_Array) / comp.genome_Size, '.2f')
|
|
533
|
+
matched_ORF_Coverage_Genome = format(100 * np.sum(matched_ORF_Nuc_Array) / comp.genome_Size,
|
|
512
534
|
'.2f') # This gets the nts which are in matched ORFs - Check below
|
|
513
535
|
# matched_ORF_Nuc_AND_Gene = np.logical_and(matched_ORF_Nuc_Array,gene_Nuc_Array) + [0 for i in range(len(gene_Nuc_Array))] # This gets the nts which are in both matched ORFs and detected genes
|
|
514
536
|
# matched_ORF_Coverage_Genome = format(100 * np.count_nonzero(matched_ORF_Nuc_AND_Gene) / comp.genome_Size,'.2f')
|
|
515
537
|
|
|
516
538
|
# gene and orf nucleotide Intersection
|
|
517
|
-
gene_ORF_Nuc_Intersection = np.
|
|
539
|
+
gene_ORF_Nuc_Intersection = np.sum(gene_Nuc_Array & orf_Nuc_Array)
|
|
518
540
|
# not gene but orf nucleotides
|
|
519
|
-
not_Gene_Nuc_Array = np.logical_not(gene_Nuc_Array)
|
|
520
|
-
|
|
521
|
-
not_Gene_Nuc_And_ORF_Count = np.count_nonzero(not_Gene_Nuc_Array & orf_Nuc_Array)
|
|
541
|
+
not_Gene_Nuc_Array = np.logical_not(gene_Nuc_Array)
|
|
542
|
+
not_Gene_Nuc_And_ORF_Count = np.sum(not_Gene_Nuc_Array & orf_Nuc_Array)
|
|
522
543
|
# not orf nucleotides but gene
|
|
523
|
-
not_ORF_Nuc_Array = np.logical_not(orf_Nuc_Array)
|
|
524
|
-
|
|
525
|
-
not_ORF_Nuc_And_Gene_Count = np.count_nonzero(not_ORF_Nuc_Array & gene_Nuc_Array)
|
|
544
|
+
not_ORF_Nuc_Array = np.logical_not(orf_Nuc_Array)
|
|
545
|
+
not_ORF_Nuc_And_Gene_Count = np.sum(not_ORF_Nuc_Array & gene_Nuc_Array)
|
|
526
546
|
# not gene or orf nucleotides
|
|
527
|
-
not_Gene_Nuc_Not_ORF_Nuc_Count = np.
|
|
547
|
+
not_Gene_Nuc_Not_ORF_Nuc_Count = np.sum(not_Gene_Nuc_Array & not_ORF_Nuc_Array)
|
|
528
548
|
# Nucleotide 'accuracy' - Normalised by number of nucelotides annotated by a gene
|
|
529
|
-
NT_TP = format(gene_ORF_Nuc_Intersection / np.
|
|
530
|
-
NT_FP = format(not_Gene_Nuc_And_ORF_Count / np.
|
|
531
|
-
NT_FN = format(not_ORF_Nuc_And_Gene_Count / np.
|
|
532
|
-
NT_TN = format(not_Gene_Nuc_Not_ORF_Nuc_Count / np.
|
|
549
|
+
NT_TP = format(gene_ORF_Nuc_Intersection / np.sum(gene_Nuc_Array), '.2f')
|
|
550
|
+
NT_FP = format(not_Gene_Nuc_And_ORF_Count / np.sum(not_Gene_Nuc_Array), '.2f')
|
|
551
|
+
NT_FN = format(not_ORF_Nuc_And_Gene_Count / np.sum(gene_Nuc_Array), '.2f')
|
|
552
|
+
NT_TN = format(not_Gene_Nuc_Not_ORF_Nuc_Count / np.sum(not_Gene_Nuc_Array), '.2f')
|
|
533
553
|
NT_Precision = format(gene_ORF_Nuc_Intersection / (gene_ORF_Nuc_Intersection + not_Gene_Nuc_And_ORF_Count), '.2f')
|
|
534
554
|
NT_Recall = format(gene_ORF_Nuc_Intersection / (gene_ORF_Nuc_Intersection + not_ORF_Nuc_And_Gene_Count), '.2f')
|
|
535
555
|
NT_False_Discovery_Rate = format(
|
|
@@ -11,7 +11,7 @@ from Comparator import tool_comparison
|
|
|
11
11
|
|
|
12
12
|
def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare):
|
|
13
13
|
genome_Seq = ""
|
|
14
|
-
with open(
|
|
14
|
+
with open(genome_to_compare, 'r') as genome:
|
|
15
15
|
for line in genome:
|
|
16
16
|
line = line.replace("\n", "")
|
|
17
17
|
if ">" not in line:
|
|
@@ -19,23 +19,23 @@ def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare
|
|
|
19
19
|
##############################################
|
|
20
20
|
genes = collections.OrderedDict()
|
|
21
21
|
count = 0
|
|
22
|
-
with open(
|
|
22
|
+
with open(input_to_analyse, 'r') as genome_gff: # Get list of missed genes
|
|
23
23
|
for line in genome_gff:
|
|
24
24
|
if ">" in line:
|
|
25
25
|
line = line.strip()
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
genes.update({count:
|
|
26
|
+
start = int(line.split('_')[1])
|
|
27
|
+
stop = int(line.split('_')[2])
|
|
28
|
+
strand = line.split('_')[3]
|
|
29
|
+
gene_details = [start,stop,strand]
|
|
30
|
+
genes.update({count: gene_details})
|
|
31
31
|
count += 1
|
|
32
32
|
##################################
|
|
33
33
|
tool_predictions = import_module('Tools.' + tool + '.' + tool)
|
|
34
34
|
tool_predictions = getattr(tool_predictions, tool)
|
|
35
35
|
orfs = tool_predictions(storfs_to_find_missing, genome_Seq)
|
|
36
|
-
all_Metrics, all_rep_Metrics, start_precision, stop_precision, other_starts, other_stops, missed_genes, unmatched_orfs, undetected_gene_metrics, unmatched_orf_metrics, gene_coverage_genome, multi_Matched_ORFs, partial_Hits = tool_comparison(
|
|
37
|
-
genes, orfs, genome_Seq)
|
|
38
|
-
outname = tool + '_' + genome_to_compare
|
|
36
|
+
all_Metrics, all_rep_Metrics, start_precision, stop_precision, other_starts, other_stops, perfect_Matches, missed_genes, unmatched_orfs, undetected_gene_metrics, unmatched_orf_metrics, orf_Coverage_Genome, matched_ORF_Coverage_Genome, gene_coverage_genome, multi_Matched_ORFs, partial_Hits = tool_comparison(
|
|
37
|
+
genes, orfs, genome_Seq,True)
|
|
38
|
+
outname = tool + '_' + genome_to_compare.split('/')[-1].split('.')[0]
|
|
39
39
|
metric_description = list(all_Metrics.keys())
|
|
40
40
|
metrics = list(all_Metrics.values())
|
|
41
41
|
rep_metric_description = list(all_rep_Metrics.keys())
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import sys
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def GFF(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
genome = args[1]
|
|
14
|
+
#types = args[2]
|
|
15
|
+
GFF_ORFs = collections.OrderedDict()
|
|
16
|
+
genome_size = len(genome)
|
|
17
|
+
genome_rev = revCompIterative(genome)
|
|
18
|
+
with open(tool_pred, 'r') as gff_input:
|
|
19
|
+
for line in gff_input:
|
|
20
|
+
if '#' not in line:
|
|
21
|
+
line = line.split('\t')
|
|
22
|
+
#gene_types = types.split(',') - Temporary fix
|
|
23
|
+
#if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9: # line[2] for normalrun
|
|
24
|
+
if 'CDS' in line[2] and len(line) == 9:
|
|
25
|
+
start = int(line[3])
|
|
26
|
+
stop = int(line[4])
|
|
27
|
+
strand = line[6]
|
|
28
|
+
info = line[8]
|
|
29
|
+
if stop >= genome_size:
|
|
30
|
+
extra_stop = stop - genome_size
|
|
31
|
+
corrected_stop = genome_size
|
|
32
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
33
|
+
r_start = genome_size - corrected_stop
|
|
34
|
+
r_stop = genome_size - start
|
|
35
|
+
seq = genome_rev[r_start:r_stop + 1]
|
|
36
|
+
extra_seq = genome_rev[-extra_stop - 1:]
|
|
37
|
+
seq = extra_seq+seq
|
|
38
|
+
startCodon = seq[:3]
|
|
39
|
+
stopCodon = seq[-3:]
|
|
40
|
+
elif '+' in strand:
|
|
41
|
+
seq = genome[start -1 :corrected_stop]
|
|
42
|
+
extra_seq = genome[:extra_stop +1]
|
|
43
|
+
seq = seq+extra_seq
|
|
44
|
+
startCodon = seq[:3]
|
|
45
|
+
stopCodon = seq[-3:]
|
|
46
|
+
else:
|
|
47
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
48
|
+
r_start = genome_size - stop
|
|
49
|
+
r_stop = genome_size - start
|
|
50
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
51
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
52
|
+
elif '+' in strand:
|
|
53
|
+
startCodon = genome[start - 1:start + 2]
|
|
54
|
+
stopCodon = genome[stop - 3:stop]
|
|
55
|
+
po = str(start) + ',' + str(stop)
|
|
56
|
+
orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
|
|
57
|
+
GFF_ORFs.update({po: orf})
|
|
58
|
+
# elif "CDS" in line[2]:
|
|
59
|
+
# sys.exit("SAS")
|
|
60
|
+
|
|
61
|
+
GFF_ORFs = sortORFs(GFF_ORFs)
|
|
62
|
+
return GFF_ORFs
|
|
@@ -14,12 +14,13 @@ def StORF_Reporter(tool_pred, genome):
|
|
|
14
14
|
genome_rev = revCompIterative(genome)
|
|
15
15
|
with open(tool_pred, 'r') as storf_input:
|
|
16
16
|
for line in storf_input:
|
|
17
|
-
if '#' not
|
|
17
|
+
if not line.startswith('#') and not line.startswith('\n'):
|
|
18
18
|
line = line.split()
|
|
19
|
-
if 'StORF_Reporter' in line[1] or 'StoRF_Reporter' in line[1]: # need to harmonise this.
|
|
19
|
+
if 'StORF_Reporter' in line[1] or 'StoRF_Reporter' in line[1] or 'StORF' in line[1] or 'StORF-Reporter' in line[1]: # need to harmonise this.
|
|
20
20
|
start = int(line[3])
|
|
21
21
|
stop = int(line[4])
|
|
22
22
|
strand = line[6]
|
|
23
|
+
info = line[8]
|
|
23
24
|
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
24
25
|
r_start = genome_size - stop
|
|
25
26
|
r_stop = genome_size - start
|
|
@@ -29,7 +30,7 @@ def StORF_Reporter(tool_pred, genome):
|
|
|
29
30
|
startCodon = genome[start:start + 3]
|
|
30
31
|
stopCodon = genome[stop - 3:stop]
|
|
31
32
|
po = str(start) + ',' + str(stop)
|
|
32
|
-
orf = [strand, startCodon, stopCodon,
|
|
33
|
+
orf = [strand, startCodon, stopCodon, 'CDS', info] # StORF/Con-StORF or CDS??
|
|
33
34
|
storf_orfs.update({po: orf})
|
|
34
35
|
|
|
35
36
|
storf_orfs = sortORFs(storf_orfs)
|
|
@@ -4,22 +4,11 @@ import collections
|
|
|
4
4
|
# Constants
|
|
5
5
|
SHORT_ORF_LENGTH = 300
|
|
6
6
|
MIN_COVERAGE = 75
|
|
7
|
-
ORForise_Version = 'v1.4.
|
|
7
|
+
ORForise_Version = 'v1.4.2'
|
|
8
8
|
|
|
9
9
|
|
|
10
10
|
def revCompIterative(watson): # Gets Reverse Complement
|
|
11
|
-
|
|
12
|
-
'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W', 'K': 'M',
|
|
13
|
-
'M': 'K', 'V': 'B', 'B': 'V', 'H': 'D', 'D': 'H'}
|
|
14
|
-
watson = watson.upper()
|
|
15
|
-
watsonrev = watson[::-1]
|
|
16
|
-
crick = ""
|
|
17
|
-
for nt in watsonrev:
|
|
18
|
-
try:
|
|
19
|
-
crick += complements[nt]
|
|
20
|
-
except KeyError:
|
|
21
|
-
crick += nt # Do not modify non-standard DNA
|
|
22
|
-
return crick
|
|
11
|
+
return watson.upper()[::-1].translate(str.maketrans("ATCGRYKMVBHD","TAGCYRMKBVDH"))
|
|
23
12
|
|
|
24
13
|
|
|
25
14
|
def sortORFs(tool_ORFs): # Will only sort by given start position
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.6
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: numpy
|
|
15
16
|
|
|
16
17
|
# ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
|
|
17
18
|
## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
|
|
@@ -20,7 +21,7 @@ License-File: LICENSE
|
|
|
20
21
|
|
|
21
22
|
# Requirements and Installation:
|
|
22
23
|
|
|
23
|
-
### The ORForise platform is written in
|
|
24
|
+
### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
|
|
24
25
|
|
|
25
26
|
## Intallation:
|
|
26
27
|
|
|
@@ -61,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
61
62
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
62
63
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
63
64
|
|
|
64
|
-
ORForise v1.4.
|
|
65
|
+
ORForise v1.4.2: Annotatione-Compare Run Parameters.
|
|
65
66
|
|
|
66
67
|
Required Arguments:
|
|
67
68
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -111,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
111
112
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
112
113
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
113
114
|
|
|
114
|
-
ORForise v1.4.
|
|
115
|
+
ORForise v1.4.2: Aggregate-Compare Run Parameters.
|
|
115
116
|
|
|
116
117
|
Required Arguments:
|
|
117
118
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -265,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
265
266
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
266
267
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
267
268
|
|
|
268
|
-
ORForise v1.4.
|
|
269
|
+
ORForise v1.4.2: GFF-Adder Run Parameters.
|
|
269
270
|
|
|
270
271
|
Required Arguments:
|
|
271
272
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -327,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
327
328
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
328
329
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
329
330
|
|
|
330
|
-
ORForise v1.4.
|
|
331
|
+
ORForise v1.4.2: GFF-Intersector Run Parameters.
|
|
331
332
|
|
|
332
333
|
Required Arguments:
|
|
333
334
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import collections
|
|
2
|
-
import sys
|
|
3
|
-
try:
|
|
4
|
-
from utils import revCompIterative
|
|
5
|
-
from utils import sortORFs
|
|
6
|
-
except ImportError:
|
|
7
|
-
from ORForise.utils import revCompIterative
|
|
8
|
-
from ORForise.utils import sortORFs
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def GFF(*args):
|
|
12
|
-
tool_pred = args[0]
|
|
13
|
-
genome = args[1]
|
|
14
|
-
types = args[2]
|
|
15
|
-
GFF_ORFs = collections.OrderedDict()
|
|
16
|
-
genome_size = len(genome)
|
|
17
|
-
genome_rev = revCompIterative(genome)
|
|
18
|
-
with open(tool_pred, 'r') as gff_input:
|
|
19
|
-
for line in gff_input:
|
|
20
|
-
if '#' not in line:
|
|
21
|
-
line = line.split('\t')
|
|
22
|
-
gene_types = types.split(',')
|
|
23
|
-
if any(gene_type == line[2] for gene_type in gene_types)and len(line) == 9: # line[2] for normalrun
|
|
24
|
-
start = int(line[3])
|
|
25
|
-
stop = int(line[4])
|
|
26
|
-
strand = line[6]
|
|
27
|
-
info = line[8]
|
|
28
|
-
#name = line[8].split('Name=')[1].split(';')[0] # Issue with multiple records for each gene.
|
|
29
|
-
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
30
|
-
r_start = genome_size - stop
|
|
31
|
-
r_stop = genome_size - start
|
|
32
|
-
startCodon = genome_rev[r_start:r_start + 3]
|
|
33
|
-
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
34
|
-
elif '+' in strand:
|
|
35
|
-
startCodon = genome[start - 1:start + 2]
|
|
36
|
-
stopCodon = genome[stop - 3:stop]
|
|
37
|
-
po = str(start) + ',' + str(stop)
|
|
38
|
-
orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
|
|
39
|
-
GFF_ORFs.update({po: orf})
|
|
40
|
-
# elif "CDS" in line[2]:
|
|
41
|
-
# sys.exit("SAS")
|
|
42
|
-
|
|
43
|
-
GFF_ORFs = sortORFs(GFF_ORFs)
|
|
44
|
-
return GFF_ORFs
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|