ORForise 1.4.1__tar.gz → 1.4.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {ORForise-1.4.1 → orforise-1.4.3}/PKG-INFO +8 -6
- {ORForise-1.4.1 → orforise-1.4.3}/README.md +4 -4
- {ORForise-1.4.1 → orforise-1.4.3}/setup.cfg +1 -1
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Annotation_Compare.py +2 -2
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Comparator.py +35 -17
- orforise-1.4.3/src/ORForise/Tools/GFF/GFF.py +62 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/utils.py +1 -1
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise.egg-info/PKG-INFO +8 -6
- ORForise-1.4.1/src/ORForise/Tools/GFF/GFF.py +0 -45
- {ORForise-1.4.1 → orforise-1.4.3}/LICENSE +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/pyproject.toml +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Aggregate_Compare.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/GFF_Adder.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/GFF_Intersector.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/StORForise.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Augustus/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Balrog/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/EasyGene/EasyGene.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/EasyGene/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/FGENESB/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/FragGeneScan/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GFF/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GLIMMER_3/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_HA/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_HMM/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_S/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/GeneMark_S_2/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/MetaGene/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Prodigal/Prodigal.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Prodigal/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/Prokka/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Reporter/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/StORF_Undetected.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/TransDecoder/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/__init__.py +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise.egg-info/SOURCES.txt +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise.egg-info/dependency_links.txt +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise.egg-info/entry_points.txt +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise.egg-info/requires.txt +0 -0
- {ORForise-1.4.1 → orforise-1.4.3}/src/ORForise.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -12,6 +12,8 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.6
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Dynamic: license-file
|
|
15
17
|
|
|
16
18
|
# ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
|
|
17
19
|
## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
|
|
@@ -61,7 +63,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
61
63
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
62
64
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
63
65
|
|
|
64
|
-
ORForise v1.4.
|
|
66
|
+
ORForise v1.4.3: Annotatione-Compare Run Parameters.
|
|
65
67
|
|
|
66
68
|
Required Arguments:
|
|
67
69
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -111,7 +113,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
111
113
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
112
114
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
113
115
|
|
|
114
|
-
ORForise v1.4.
|
|
116
|
+
ORForise v1.4.3: Aggregate-Compare Run Parameters.
|
|
115
117
|
|
|
116
118
|
Required Arguments:
|
|
117
119
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -265,7 +267,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
265
267
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
266
268
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
267
269
|
|
|
268
|
-
ORForise v1.4.
|
|
270
|
+
ORForise v1.4.3: GFF-Adder Run Parameters.
|
|
269
271
|
|
|
270
272
|
Required Arguments:
|
|
271
273
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -327,7 +329,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
327
329
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
328
330
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
329
331
|
|
|
330
|
-
ORForise v1.4.
|
|
332
|
+
ORForise v1.4.3: GFF-Intersector Run Parameters.
|
|
331
333
|
|
|
332
334
|
Required Arguments:
|
|
333
335
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -46,7 +46,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
46
46
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
47
47
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
48
48
|
|
|
49
|
-
ORForise v1.4.
|
|
49
|
+
ORForise v1.4.3: Annotatione-Compare Run Parameters.
|
|
50
50
|
|
|
51
51
|
Required Arguments:
|
|
52
52
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -96,7 +96,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
96
96
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
97
97
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
98
98
|
|
|
99
|
-
ORForise v1.4.
|
|
99
|
+
ORForise v1.4.3: Aggregate-Compare Run Parameters.
|
|
100
100
|
|
|
101
101
|
Required Arguments:
|
|
102
102
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -250,7 +250,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
250
250
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
251
251
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
252
252
|
|
|
253
|
-
ORForise v1.4.
|
|
253
|
+
ORForise v1.4.3: GFF-Adder Run Parameters.
|
|
254
254
|
|
|
255
255
|
Required Arguments:
|
|
256
256
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -312,7 +312,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
312
312
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
313
313
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
314
314
|
|
|
315
|
-
ORForise v1.4.
|
|
315
|
+
ORForise v1.4.3: GFF-Intersector Run Parameters.
|
|
316
316
|
|
|
317
317
|
Required Arguments:
|
|
318
318
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -72,11 +72,11 @@ def comparator(options):
|
|
|
72
72
|
rep_metric_description = list(all_rep_Metrics.keys())
|
|
73
73
|
rep_metrics = list(all_rep_Metrics.values())
|
|
74
74
|
############## Printing to std-out and optional csv file
|
|
75
|
-
print('Genome Used: ' + str(options.
|
|
75
|
+
print('Genome Used: ' + str(options.genome_DNA.split('/')[-1]))
|
|
76
76
|
if options.reference_tool:
|
|
77
77
|
print('Reference Tool Used: '+str(options.reference_tool))
|
|
78
78
|
else:
|
|
79
|
-
print('Reference Used: ' + str(options.reference_annotation))
|
|
79
|
+
print('Reference Used: ' + str(options.reference_annotation.split('/')[-1]))
|
|
80
80
|
print('Tool Compared: '+str(options.tool))
|
|
81
81
|
print('Perfect Matches: ' + str(len(perfect_Matches)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(perfect_Matches)/len(ref_genes),'.2f')+'%')
|
|
82
82
|
print('Partial Matches: ' + str(len(partial_Hits)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(partial_Hits)/len(ref_genes),'.2f')+'%')
|
|
@@ -47,13 +47,30 @@ comp = comparator()
|
|
|
47
47
|
|
|
48
48
|
def is_double_range(range1, range2):
|
|
49
49
|
return len(range1) >= 2 * len(range2)
|
|
50
|
-
def nuc_Count(start, stop, strand): # Gets correct seq then returns GC
|
|
51
|
-
if
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
def nuc_Count(verbose, start, stop, strand): # Gets correct seq then returns GC
|
|
51
|
+
if stop >= comp.genome_Size:
|
|
52
|
+
if verbose == True:
|
|
53
|
+
print("There is a wrap around gene and I am dealing with it the best I can - Start: " + str(start) + " Stop: " + str(stop))
|
|
54
|
+
extra_stop = stop - comp.genome_Size
|
|
55
|
+
stop = comp.genome_Size
|
|
56
|
+
if strand == '-':
|
|
57
|
+
r_Start = comp.genome_Size - stop
|
|
58
|
+
r_Stop = comp.genome_Size - start
|
|
59
|
+
seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
|
|
60
|
+
extra_seq = (comp.genome_Seq_Rev[-extra_stop-1:])
|
|
61
|
+
seq = extra_seq+seq
|
|
62
|
+
elif strand == '+':
|
|
63
|
+
seq = comp.genome_Seq[start - 1:stop]
|
|
64
|
+
extra_seq = comp.genome_Seq[:extra_stop +1]
|
|
65
|
+
seq = seq+extra_seq
|
|
66
|
+
#seq = (comp.genome_Seq[start - 1:stop])
|
|
67
|
+
else:
|
|
68
|
+
if strand == '-':
|
|
69
|
+
r_Start = comp.genome_Size - stop
|
|
70
|
+
r_Stop = comp.genome_Size - start
|
|
71
|
+
seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
|
|
72
|
+
elif strand == '+':
|
|
73
|
+
seq = (comp.genome_Seq[start - 1:stop])
|
|
57
74
|
c = 0
|
|
58
75
|
a = 0
|
|
59
76
|
g = 0
|
|
@@ -213,7 +230,8 @@ def candidate_ORF_Selection(gene_Set,
|
|
|
213
230
|
for c_Pos, c_ORF_Details in candidate_ORFs.items():
|
|
214
231
|
o_Start = int(c_Pos.split(',')[0])
|
|
215
232
|
o_Stop = int(c_Pos.split(',')[1])
|
|
216
|
-
|
|
233
|
+
# Below is not a long term fix
|
|
234
|
+
coverage = c_ORF_Details[-1]
|
|
217
235
|
orf_Set = set(range(o_Start, o_Stop + 1))
|
|
218
236
|
if coverage > current_Coverage:
|
|
219
237
|
current_Coverage = coverage
|
|
@@ -323,8 +341,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
323
341
|
comp.genes_Detected.update({str(gene_details): g_pos})
|
|
324
342
|
match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
|
|
325
343
|
perfect_Matched_Genes(g_Start, g_Stop, g_Strand)
|
|
326
|
-
if verbose == True:
|
|
327
|
-
|
|
344
|
+
#if verbose == True:
|
|
345
|
+
# print('Perfect Match')
|
|
328
346
|
elif perfect_Match == False and len(
|
|
329
347
|
overlapping_ORFs) == 1: # If we do not have a perfect match but 1 ORF which has passed the filtering
|
|
330
348
|
orf_Pos = list(overlapping_ORFs.keys())[0]
|
|
@@ -344,8 +362,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
344
362
|
comp.matched_ORFs.update({orf_Pos: m_ORF_Details})
|
|
345
363
|
comp.genes_Detected.update({str(gene_details): orf_Pos})
|
|
346
364
|
match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
|
|
347
|
-
if verbose == True:
|
|
348
|
-
|
|
365
|
+
#if verbose == True:
|
|
366
|
+
# print('Partial Match')
|
|
349
367
|
partial_Hit_Calc(g_Start, g_Stop, g_Strand, o_Start, o_Stop)
|
|
350
368
|
elif perfect_Match == False and len(
|
|
351
369
|
overlapping_ORFs) >= 1: # If we have more than 1 potential ORF match, we check to see which is the 'best' hit
|
|
@@ -374,8 +392,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
374
392
|
genes_Unmatched(g_Start, g_Stop, g_Strand) #
|
|
375
393
|
else:
|
|
376
394
|
genes_Unmatched(g_Start, g_Stop, g_Strand) # No hit
|
|
377
|
-
if verbose == True:
|
|
378
|
-
|
|
395
|
+
#if verbose == True:
|
|
396
|
+
# print("No Hit")
|
|
379
397
|
for orf_Key in comp.matched_ORFs: # Remove ORFs from out of frame if ORF was correctly matched to another Gene
|
|
380
398
|
if orf_Key in comp.out_Of_Frame_ORFs:
|
|
381
399
|
del comp.out_Of_Frame_ORFs[orf_Key]
|
|
@@ -409,7 +427,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
409
427
|
if gene_Length == 0: print(g_Start, g_Stop, "!!!!!!!!!!!!!!!!!!!!!!!!")
|
|
410
428
|
comp.gene_Lengths.append(gene_Length)
|
|
411
429
|
gene_Nuc_Array[g_Start - 1:g_Stop] = True # Changing all between the two positions to 1's
|
|
412
|
-
comp.gene_GC.append(nuc_Count(g_Start, g_Stop, g_Strand))
|
|
430
|
+
comp.gene_GC.append(nuc_Count(verbose, g_Start, g_Stop, g_Strand))
|
|
413
431
|
if gene_Length <= SHORT_ORF_LENGTH: # .utils
|
|
414
432
|
comp.gene_Short.append(gene_Length)
|
|
415
433
|
### Calculate overlapping Genes -
|
|
@@ -453,7 +471,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
453
471
|
orf_Length = (o_Stop - o_Start) +1
|
|
454
472
|
comp.orf_Lengths.append(orf_Length)
|
|
455
473
|
orf_Nuc_Array[o_Start - 1:o_Stop] = True # Changing all between the two positions to 1's
|
|
456
|
-
comp.orf_GC.append(nuc_Count(o_Start, o_Stop, o_Strand))
|
|
474
|
+
comp.orf_GC.append(nuc_Count(verbose, o_Start, o_Stop, o_Strand))
|
|
457
475
|
if orf_Length <= SHORT_ORF_LENGTH: # .utils
|
|
458
476
|
comp.orf_Short.append(orf_Length)
|
|
459
477
|
### Calculate overlapping ORFs -
|
|
@@ -487,7 +505,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
487
505
|
mo_Length = (mo_Stop - mo_Start)
|
|
488
506
|
matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = True # This is the complete matched orf not the matched orf bits
|
|
489
507
|
|
|
490
|
-
comp.m_ORF_GC.append(nuc_Count(mo_Start, mo_Stop, mo_Strand))
|
|
508
|
+
comp.m_ORF_GC.append(nuc_Count(verbose, mo_Start, mo_Stop, mo_Strand))
|
|
491
509
|
if mo_Length <= SHORT_ORF_LENGTH: # .utils
|
|
492
510
|
comp.m_ORF_Short.append(mo_Length)
|
|
493
511
|
### Calculate overlapping Matched ORFs -
|
|
@@ -0,0 +1,62 @@
|
|
|
1
|
+
import collections
|
|
2
|
+
import sys
|
|
3
|
+
try:
|
|
4
|
+
from utils import revCompIterative
|
|
5
|
+
from utils import sortORFs
|
|
6
|
+
except ImportError:
|
|
7
|
+
from ORForise.utils import revCompIterative
|
|
8
|
+
from ORForise.utils import sortORFs
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def GFF(*args):
|
|
12
|
+
tool_pred = args[0]
|
|
13
|
+
genome = args[1]
|
|
14
|
+
#types = args[2]
|
|
15
|
+
GFF_ORFs = collections.OrderedDict()
|
|
16
|
+
genome_size = len(genome)
|
|
17
|
+
genome_rev = revCompIterative(genome)
|
|
18
|
+
with open(tool_pred, 'r') as gff_input:
|
|
19
|
+
for line in gff_input:
|
|
20
|
+
if '#' not in line:
|
|
21
|
+
line = line.split('\t')
|
|
22
|
+
#gene_types = types.split(',') - Temporary fix
|
|
23
|
+
#if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9: # line[2] for normalrun
|
|
24
|
+
if 'CDS' in line[2] and len(line) == 9:
|
|
25
|
+
start = int(line[3])
|
|
26
|
+
stop = int(line[4])
|
|
27
|
+
strand = line[6]
|
|
28
|
+
info = line[8]
|
|
29
|
+
if stop >= genome_size:
|
|
30
|
+
extra_stop = stop - genome_size
|
|
31
|
+
corrected_stop = genome_size
|
|
32
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
33
|
+
r_start = genome_size - corrected_stop
|
|
34
|
+
r_stop = genome_size - start
|
|
35
|
+
seq = genome_rev[r_start:r_stop + 1]
|
|
36
|
+
extra_seq = genome_rev[-extra_stop - 1:]
|
|
37
|
+
seq = extra_seq+seq
|
|
38
|
+
startCodon = seq[:3]
|
|
39
|
+
stopCodon = seq[-3:]
|
|
40
|
+
elif '+' in strand:
|
|
41
|
+
seq = genome[start -1 :corrected_stop]
|
|
42
|
+
extra_seq = genome[:extra_stop +1]
|
|
43
|
+
seq = seq+extra_seq
|
|
44
|
+
startCodon = seq[:3]
|
|
45
|
+
stopCodon = seq[-3:]
|
|
46
|
+
else:
|
|
47
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
48
|
+
r_start = genome_size - stop
|
|
49
|
+
r_stop = genome_size - start
|
|
50
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
51
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
52
|
+
elif '+' in strand:
|
|
53
|
+
startCodon = genome[start - 1:start + 2]
|
|
54
|
+
stopCodon = genome[stop - 3:stop]
|
|
55
|
+
po = str(start) + ',' + str(stop)
|
|
56
|
+
orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
|
|
57
|
+
GFF_ORFs.update({po: orf})
|
|
58
|
+
# elif "CDS" in line[2]:
|
|
59
|
+
# sys.exit("SAS")
|
|
60
|
+
|
|
61
|
+
GFF_ORFs = sortORFs(GFF_ORFs)
|
|
62
|
+
return GFF_ORFs
|
|
@@ -1,6 +1,6 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.3
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -12,6 +12,8 @@ Classifier: Operating System :: OS Independent
|
|
|
12
12
|
Requires-Python: >=3.6
|
|
13
13
|
Description-Content-Type: text/markdown
|
|
14
14
|
License-File: LICENSE
|
|
15
|
+
Requires-Dist: numpy
|
|
16
|
+
Dynamic: license-file
|
|
15
17
|
|
|
16
18
|
# ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
|
|
17
19
|
## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
|
|
@@ -61,7 +63,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
61
63
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
62
64
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
63
65
|
|
|
64
|
-
ORForise v1.4.
|
|
66
|
+
ORForise v1.4.3: Annotatione-Compare Run Parameters.
|
|
65
67
|
|
|
66
68
|
Required Arguments:
|
|
67
69
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -111,7 +113,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
111
113
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
112
114
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
113
115
|
|
|
114
|
-
ORForise v1.4.
|
|
116
|
+
ORForise v1.4.3: Aggregate-Compare Run Parameters.
|
|
115
117
|
|
|
116
118
|
Required Arguments:
|
|
117
119
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -265,7 +267,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
265
267
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
266
268
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
267
269
|
|
|
268
|
-
ORForise v1.4.
|
|
270
|
+
ORForise v1.4.3: GFF-Adder Run Parameters.
|
|
269
271
|
|
|
270
272
|
Required Arguments:
|
|
271
273
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -327,7 +329,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
327
329
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
328
330
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
329
331
|
|
|
330
|
-
ORForise v1.4.
|
|
332
|
+
ORForise v1.4.3: GFF-Intersector Run Parameters.
|
|
331
333
|
|
|
332
334
|
Required Arguments:
|
|
333
335
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -1,45 +0,0 @@
|
|
|
1
|
-
import collections
|
|
2
|
-
import sys
|
|
3
|
-
try:
|
|
4
|
-
from utils import revCompIterative
|
|
5
|
-
from utils import sortORFs
|
|
6
|
-
except ImportError:
|
|
7
|
-
from ORForise.utils import revCompIterative
|
|
8
|
-
from ORForise.utils import sortORFs
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def GFF(*args):
|
|
12
|
-
tool_pred = args[0]
|
|
13
|
-
genome = args[1]
|
|
14
|
-
#types = args[2]
|
|
15
|
-
GFF_ORFs = collections.OrderedDict()
|
|
16
|
-
genome_size = len(genome)
|
|
17
|
-
genome_rev = revCompIterative(genome)
|
|
18
|
-
with open(tool_pred, 'r') as gff_input:
|
|
19
|
-
for line in gff_input:
|
|
20
|
-
if '#' not in line:
|
|
21
|
-
line = line.split('\t')
|
|
22
|
-
#gene_types = types.split(',') - Temporary fix
|
|
23
|
-
#if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9: # line[2] for normalrun
|
|
24
|
-
if 'CDS' in line[2] and len(line) == 9:
|
|
25
|
-
start = int(line[3])
|
|
26
|
-
stop = int(line[4])
|
|
27
|
-
strand = line[6]
|
|
28
|
-
info = line[8]
|
|
29
|
-
#name = line[8].split('Name=')[1].split(';')[0] # Issue with multiple records for each gene.
|
|
30
|
-
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
31
|
-
r_start = genome_size - stop
|
|
32
|
-
r_stop = genome_size - start
|
|
33
|
-
startCodon = genome_rev[r_start:r_start + 3]
|
|
34
|
-
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
35
|
-
elif '+' in strand:
|
|
36
|
-
startCodon = genome[start - 1:start + 2]
|
|
37
|
-
stopCodon = genome[stop - 3:stop]
|
|
38
|
-
po = str(start) + ',' + str(stop)
|
|
39
|
-
orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
|
|
40
|
-
GFF_ORFs.update({po: orf})
|
|
41
|
-
# elif "CDS" in line[2]:
|
|
42
|
-
# sys.exit("SAS")
|
|
43
|
-
|
|
44
|
-
GFF_ORFs = sortORFs(GFF_ORFs)
|
|
45
|
-
return GFF_ORFs
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{ORForise-1.4.1 → orforise-1.4.3}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|