ORForise 1.5.0__tar.gz → 1.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orforise-1.5.0 → orforise-1.5.1}/PKG-INFO +7 -31
- {orforise-1.5.0 → orforise-1.5.1}/README.md +6 -30
- {orforise-1.5.0 → orforise-1.5.1}/setup.cfg +1 -1
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Annotation_Compare.py +104 -53
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Comparator.py +60 -28
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/utils.py +1 -1
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise.egg-info/PKG-INFO +7 -31
- {orforise-1.5.0 → orforise-1.5.1}/LICENSE +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/pyproject.toml +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Aggregate_Compare.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/GFF_Adder.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/GFF_Intersector.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/StORForise.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Augustus/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Balrog/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/EasyGene/EasyGene.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/EasyGene/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/FGENESB/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/FragGeneScan/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GFF/GFF.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GFF/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GLIMMER_3/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_HA/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_HMM/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_S/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_S_2/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/MetaGene/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Prodigal/Prodigal.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Prodigal/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/Prokka/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Reporter/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/StORF_Undetected.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/TransDecoder/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise/__init__.py +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise.egg-info/SOURCES.txt +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise.egg-info/dependency_links.txt +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise.egg-info/entry_points.txt +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise.egg-info/requires.txt +0 -0
- {orforise-1.5.0 → orforise-1.5.1}/src/ORForise.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.1
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -57,13 +57,7 @@ Example output files from ```Annotation-Compare```, ```GFF-Adder``` and ```GFF-I
|
|
|
57
57
|
For Help: ```Annotation-Compare -h ```
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
|
|
61
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
62
|
-
#####
|
|
63
|
-
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
64
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
65
|
-
|
|
66
|
-
ORForise v1.5.0: Annotatione-Compare Run Parameters.
|
|
60
|
+
ORForise v1.5.1: Annotatione-Compare Run Parameters.
|
|
67
61
|
|
|
68
62
|
Required Arguments:
|
|
69
63
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -78,8 +72,8 @@ Optional Arguments:
|
|
|
78
72
|
name to compare output from two tools
|
|
79
73
|
|
|
80
74
|
Output:
|
|
81
|
-
-o
|
|
82
|
-
|
|
75
|
+
-o OUTDIR Define directory where detailed output should be places
|
|
76
|
+
-n OUTNAME Define output filename(s) prefix - If not provided, filename of reference annotation file will be used- <outname>_<contig_id>_ORF_Comparison.csv
|
|
83
77
|
|
|
84
78
|
Misc:
|
|
85
79
|
-v {True,False} Default - False: Print out runtime status
|
|
@@ -107,13 +101,7 @@ ORForise can be used as the example below.
|
|
|
107
101
|
For Help: ```Aggregate-Compare -h ```
|
|
108
102
|
|
|
109
103
|
```python
|
|
110
|
-
|
|
111
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
112
|
-
#####
|
|
113
|
-
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
114
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
115
|
-
|
|
116
|
-
ORForise v1.5.0: Aggregate-Compare Run Parameters.
|
|
104
|
+
ORForise v1.5.1: Aggregate-Compare Run Parameters.
|
|
117
105
|
|
|
118
106
|
Required Arguments:
|
|
119
107
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -261,13 +249,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
261
249
|
For Help: ```GFF-Adder -h ```
|
|
262
250
|
|
|
263
251
|
```python
|
|
264
|
-
|
|
265
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
266
|
-
#####
|
|
267
|
-
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
268
|
-
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
269
|
-
|
|
270
|
-
ORForise v1.5.0: GFF-Adder Run Parameters.
|
|
252
|
+
ORForise v1.5.1: GFF-Adder Run Parameters.
|
|
271
253
|
|
|
272
254
|
Required Arguments:
|
|
273
255
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -323,13 +305,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
323
305
|
|
|
324
306
|
For Help: ```GFF-Intersector -h ```
|
|
325
307
|
```python
|
|
326
|
-
|
|
327
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
328
|
-
#####
|
|
329
|
-
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
330
|
-
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
331
|
-
|
|
332
|
-
ORForise v1.5.0: GFF-Intersector Run Parameters.
|
|
308
|
+
ORForise v1.5.1: GFF-Intersector Run Parameters.
|
|
333
309
|
|
|
334
310
|
Required Arguments:
|
|
335
311
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -40,13 +40,7 @@ Example output files from ```Annotation-Compare```, ```GFF-Adder``` and ```GFF-I
|
|
|
40
40
|
For Help: ```Annotation-Compare -h ```
|
|
41
41
|
|
|
42
42
|
```python
|
|
43
|
-
|
|
44
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
45
|
-
#####
|
|
46
|
-
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
47
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
48
|
-
|
|
49
|
-
ORForise v1.5.0: Annotatione-Compare Run Parameters.
|
|
43
|
+
ORForise v1.5.1: Annotatione-Compare Run Parameters.
|
|
50
44
|
|
|
51
45
|
Required Arguments:
|
|
52
46
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -61,8 +55,8 @@ Optional Arguments:
|
|
|
61
55
|
name to compare output from two tools
|
|
62
56
|
|
|
63
57
|
Output:
|
|
64
|
-
-o
|
|
65
|
-
|
|
58
|
+
-o OUTDIR Define directory where detailed output should be places
|
|
59
|
+
-n OUTNAME Define output filename(s) prefix - If not provided, filename of reference annotation file will be used- <outname>_<contig_id>_ORF_Comparison.csv
|
|
66
60
|
|
|
67
61
|
Misc:
|
|
68
62
|
-v {True,False} Default - False: Print out runtime status
|
|
@@ -90,13 +84,7 @@ ORForise can be used as the example below.
|
|
|
90
84
|
For Help: ```Aggregate-Compare -h ```
|
|
91
85
|
|
|
92
86
|
```python
|
|
93
|
-
|
|
94
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
95
|
-
#####
|
|
96
|
-
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
97
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
98
|
-
|
|
99
|
-
ORForise v1.5.0: Aggregate-Compare Run Parameters.
|
|
87
|
+
ORForise v1.5.1: Aggregate-Compare Run Parameters.
|
|
100
88
|
|
|
101
89
|
Required Arguments:
|
|
102
90
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -244,13 +232,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
244
232
|
For Help: ```GFF-Adder -h ```
|
|
245
233
|
|
|
246
234
|
```python
|
|
247
|
-
|
|
248
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
249
|
-
#####
|
|
250
|
-
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
251
|
-
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
252
|
-
|
|
253
|
-
ORForise v1.5.0: GFF-Adder Run Parameters.
|
|
235
|
+
ORForise v1.5.1: GFF-Adder Run Parameters.
|
|
254
236
|
|
|
255
237
|
Required Arguments:
|
|
256
238
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -306,13 +288,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
306
288
|
|
|
307
289
|
For Help: ```GFF-Intersector -h ```
|
|
308
290
|
```python
|
|
309
|
-
|
|
310
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
311
|
-
#####
|
|
312
|
-
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
313
|
-
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
314
|
-
|
|
315
|
-
ORForise v1.5.0: GFF-Intersector Run Parameters.
|
|
291
|
+
ORForise v1.5.1: GFF-Intersector Run Parameters.
|
|
316
292
|
|
|
317
293
|
Required Arguments:
|
|
318
294
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
from importlib import import_module
|
|
2
2
|
import argparse
|
|
3
|
-
import sys,os
|
|
4
|
-
import gzip,csv
|
|
3
|
+
import sys, os
|
|
4
|
+
import gzip, csv
|
|
5
|
+
import logging
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
5
8
|
|
|
6
9
|
try:
|
|
7
10
|
from Comparator import tool_comparison
|
|
@@ -13,10 +16,21 @@ try:
|
|
|
13
16
|
except ImportError:
|
|
14
17
|
from ORForise.utils import *
|
|
15
18
|
|
|
19
|
+
|
|
20
|
+
##########################
|
|
21
|
+
|
|
22
|
+
# Consolidate printing and logging into a single block
|
|
23
|
+
def _pct(n, total):
|
|
24
|
+
try:
|
|
25
|
+
return format(100 * n / total, '.2f') + '%'
|
|
26
|
+
except Exception:
|
|
27
|
+
return 'N/A'
|
|
28
|
+
|
|
16
29
|
##########################
|
|
17
30
|
|
|
18
31
|
def comparator(options):
|
|
19
32
|
|
|
33
|
+
|
|
20
34
|
try:
|
|
21
35
|
try: # Detect whether fasta/gff files are .gz or text and read accordingly
|
|
22
36
|
fasta_in = gzip.open(options.genome_dna, 'rt')
|
|
@@ -77,36 +91,56 @@ def comparator(options):
|
|
|
77
91
|
'Contig\tGenes\tORFs\tPerfect_Matches\tPartial_Matches\tMissed_Genes\tUnmatched_ORFs\tMulti_Matched_ORFs\n')
|
|
78
92
|
|
|
79
93
|
for dna_region, result in results.items():
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
contig_summaries.append([
|
|
90
|
-
|
|
91
|
-
])
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
94
|
+
if result:
|
|
95
|
+
num_current_genes = len(dna_regions[dna_region][2])
|
|
96
|
+
num_orfs = result['pred_metrics']['Number_of_ORFs']
|
|
97
|
+
num_perfect = result['pred_metrics']['Number_of_Perfect_Matches']
|
|
98
|
+
num_partial = len(result['pred_metrics']['partial_Hits'])
|
|
99
|
+
num_missed = len(result['rep_metrics']['genes_Undetected'])
|
|
100
|
+
num_unmatched = len(result['pred_metrics']['unmatched_ORFs'])
|
|
101
|
+
num_multi = len(result['pred_metrics']['multi_Matched_ORFs'])
|
|
102
|
+
# Collect summary for this contig
|
|
103
|
+
contig_summaries.append([dna_region, num_current_genes, num_orfs, num_perfect, num_partial, num_missed, num_unmatched, num_multi])
|
|
104
|
+
num_current_genes = len(dna_regions[dna_region][2])
|
|
105
|
+
genome_name = options.reference_annotation.split('/')[-1].split('.')[0]
|
|
106
|
+
rep_metric_description, rep_metrics = get_rep_metrics(result)
|
|
107
|
+
all_metric_description, all_metrics = get_all_metrics(result)
|
|
108
|
+
|
|
109
|
+
# Safely extract metric values
|
|
110
|
+
num_orfs = result.get('pred_metrics', {}).get('Number_of_ORFs') if isinstance(result, dict) else 'N/A'
|
|
111
|
+
perfect = result.get('pred_metrics', {}).get('Number_of_Perfect_Matches') if isinstance(result, dict) else 0
|
|
112
|
+
partial = len(result.get('pred_metrics', {}).get('partial_Hits', [])) if isinstance(result, dict) else 'N/A'
|
|
113
|
+
missed = len(result.get('rep_metrics', {}).get('genes_Undetected', [])) if isinstance(result, dict) else 'N/A'
|
|
114
|
+
unmatched = len(result.get('pred_metrics', {}).get('unmatched_ORFs', [])) if isinstance(result, dict) else 'N/A'
|
|
115
|
+
multi = len(result.get('pred_metrics', {}).get('multi_Matched_ORFs', [])) if isinstance(result, dict) else 'N/A'
|
|
116
|
+
|
|
117
|
+
lines = [
|
|
118
|
+
f"These are the results for: {dna_region}",
|
|
119
|
+
f"Current Contig: {dna_region}",
|
|
120
|
+
f"Number of Genes: {num_current_genes}",
|
|
121
|
+
f"Number of ORFs: {num_orfs}",
|
|
122
|
+
f"Perfect Matches: {perfect} [{num_current_genes}] - {_pct(perfect, num_current_genes) if isinstance(num_current_genes, (int, float)) else 'N/A'}",
|
|
123
|
+
f"Partial Matches: {partial} [{num_current_genes}] - {_pct(partial, num_current_genes) if isinstance(num_current_genes, (int, float)) else 'N/A'}",
|
|
124
|
+
f"Missed Genes: {missed} [{num_current_genes}] - {_pct(missed, num_current_genes) if isinstance(num_current_genes, (int, float)) else 'N/A'}",
|
|
125
|
+
f"Unmatched ORFs: {unmatched} [{num_current_genes}] - {_pct(unmatched, num_current_genes) if isinstance(num_current_genes, (int, float)) else 'N/A'}",
|
|
126
|
+
f"Multi-matched ORFs: {multi} [{num_current_genes}] - {_pct(multi, num_current_genes) if isinstance(num_current_genes, (int, float)) else 'N/A'}"
|
|
127
|
+
]
|
|
128
|
+
|
|
129
|
+
full_msg = '\n'.join(lines) + '\n'
|
|
130
|
+
if options.verbose:
|
|
131
|
+
print(full_msg)
|
|
132
|
+
options.output_logger.info(full_msg)
|
|
133
|
+
|
|
134
|
+
# print("These are the results for: " + dna_region + '\n')
|
|
135
|
+
# print('Current Contig: ' + str(dna_region))
|
|
136
|
+
# print('Number of Genes: ' + str(num_current_genes))
|
|
137
|
+
# print('Number of ORFs: ' + str(result['pred_metrics']['Number_of_ORFs']))
|
|
138
|
+
# print('Perfect Matches: ' + str(result['pred_metrics']['Number_of_Perfect_Matches']) + ' [' + str(num_current_genes)+ '] - '+ format(100 * result['pred_metrics']['Number_of_Perfect_Matches']/num_current_genes,'.2f')+'%')
|
|
139
|
+
# print('Partial Matches: ' + str(len(result['pred_metrics']['partial_Hits'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['partial_Hits'])/num_current_genes,'.2f')+'%')
|
|
140
|
+
# print('Missed Genes: ' + str(len(result['rep_metrics']['genes_Undetected'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['rep_metrics']['genes_Undetected'])/num_current_genes,'.2f')+'%')
|
|
141
|
+
# print('Unmatched ORFs: ' + str(len(result['pred_metrics']['unmatched_ORFs'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['unmatched_ORFs'])/num_current_genes,'.2f')+'%')
|
|
142
|
+
# print('Multi-matched ORFs: ' + str(len(result['pred_metrics']['multi_Matched_ORFs'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['multi_Matched_ORFs'])/num_current_genes,'.2f')+'%')
|
|
143
|
+
|
|
110
144
|
# Prepare output directory and file names for each contig
|
|
111
145
|
contig_save = dna_region.replace('/', '_').replace('\\', '_')
|
|
112
146
|
contig_dir = os.path.join(options.outdir, contig_save)
|
|
@@ -210,6 +244,11 @@ def comparator(options):
|
|
|
210
244
|
key_parts = key.split(',')
|
|
211
245
|
multi = f">Predicted_CDS:{key_parts[0]}-{key_parts[1]}_Genes:{'|'.join(value)}"
|
|
212
246
|
f.write(f"{multi}\n")
|
|
247
|
+
else:
|
|
248
|
+
if options.verbose:
|
|
249
|
+
print(f"No results to process for dna region - " + str(dna_region))
|
|
250
|
+
options.output_logger.info(f"No results to process for dna region - " + str(dna_region))
|
|
251
|
+
|
|
213
252
|
|
|
214
253
|
# After all contigs, append the summary table to the main summary file
|
|
215
254
|
if options.outdir and contig_summaries:
|
|
@@ -238,23 +277,21 @@ def comparator(options):
|
|
|
238
277
|
out_file.write(
|
|
239
278
|
f'Multi-matched ORFs: {total_multi} [{total_genes}] - {format(100 * total_multi / total_genes, ".2f")}%\n')
|
|
240
279
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
f
|
|
248
|
-
|
|
249
|
-
f
|
|
250
|
-
|
|
251
|
-
print(
|
|
252
|
-
f'Unmatched ORFs: {total_unmatched} [{total_genes}] - {format(100 * total_unmatched / total_genes, ".2f")}%')
|
|
253
|
-
print(
|
|
254
|
-
f'Multi-matched ORFs: {total_multi} [{total_genes}] - {format(100 * total_multi / total_genes, ".2f")}%')
|
|
255
|
-
|
|
256
|
-
|
|
280
|
+
lines = [
|
|
281
|
+
f"Combined metrics for all contigs:",
|
|
282
|
+
f"Number of Genes: {total_genes}",
|
|
283
|
+
f"Number of ORFs: {total_orfs}",
|
|
284
|
+
f"Perfect Matches: {total_perfect} [{total_genes}] - {format(100 * total_perfect / total_genes, ".2f")}%",
|
|
285
|
+
f"Partial Matches: {total_partial} [{total_genes}] - {format(100 * total_partial / total_genes, ".2f")}%",
|
|
286
|
+
f"Missed Genes: {total_missed} [{total_genes}] - {format(100 * total_missed / total_genes, ".2f")}%",
|
|
287
|
+
f"Unmatched ORFs: {total_unmatched} [{total_genes}] - {format(100 * total_unmatched / total_genes, ".2f")}%",
|
|
288
|
+
f"Multi-matched ORFs: {total_multi} [{total_genes}] - {format(100 * total_multi / total_genes, ".2f")}%"
|
|
289
|
+
]
|
|
257
290
|
|
|
291
|
+
full_msg = '\n'.join(lines) + '\n'
|
|
292
|
+
if options.verbose:
|
|
293
|
+
print(full_msg)
|
|
294
|
+
options.output_logger.info(full_msg)
|
|
258
295
|
|
|
259
296
|
|
|
260
297
|
def main():
|
|
@@ -282,18 +319,32 @@ def main():
|
|
|
282
319
|
'- Provide tool name to compare output from two tools')
|
|
283
320
|
|
|
284
321
|
output = parser.add_argument_group('Output')
|
|
285
|
-
output.add_argument('-o', dest='outdir', required=
|
|
286
|
-
help='Define directory where detailed output should be places
|
|
322
|
+
output.add_argument('-o', dest='outdir', required=True,
|
|
323
|
+
help='Define directory where detailed output should be places')
|
|
287
324
|
output.add_argument('-n', dest='outname', required=False,
|
|
288
|
-
help='Define output
|
|
325
|
+
help='Define output filename(s) prefix - If not provided, filename of reference '
|
|
326
|
+
'annotation file will be used- <outname>_<contig_id>_ORF_Comparison.csv')
|
|
289
327
|
|
|
290
328
|
misc = parser.add_argument_group('Misc')
|
|
291
329
|
misc.add_argument('-v', dest='verbose', default='False', type=eval, choices=[True, False],
|
|
292
330
|
help='Default - False: Print out runtime status')
|
|
293
331
|
options = parser.parse_args()
|
|
294
332
|
|
|
295
|
-
if options.
|
|
296
|
-
|
|
333
|
+
options.outname = options.outname if options.outname else options.reference_annotation.split('/')[-1].split('.')[0]
|
|
334
|
+
|
|
335
|
+
# Initialise loggers once and store on options
|
|
336
|
+
if not getattr(options, 'logger_initialized', False):
|
|
337
|
+
os.makedirs(options.outdir, exist_ok=True)
|
|
338
|
+
output_log = os.path.join(options.outdir, f"ORForise_{options.outname}_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
|
|
339
|
+
logger = logging.getLogger('ORForise.output')
|
|
340
|
+
logger.setLevel(logging.INFO)
|
|
341
|
+
fh_out = logging.FileHandler(output_log, encoding='utf-8')
|
|
342
|
+
fh_out.setFormatter(logging.Formatter('%(message)s'))
|
|
343
|
+
logger.addHandler(fh_out)
|
|
344
|
+
|
|
345
|
+
options.output_logger = logger
|
|
346
|
+
options.logger_initialized = True
|
|
347
|
+
|
|
297
348
|
|
|
298
349
|
comparator(options)
|
|
299
350
|
|
|
@@ -206,33 +206,53 @@ def start_Codon_Count(orfs):
|
|
|
206
206
|
else:
|
|
207
207
|
other += 1
|
|
208
208
|
other_Starts.append(codon)
|
|
209
|
-
atg_P = format(100 * atg / len(orfs), '.2f')
|
|
210
|
-
gtg_P = format(100 * gtg / len(orfs), '.2f')
|
|
211
|
-
ttg_P = format(100 * ttg / len(orfs), '.2f')
|
|
212
|
-
att_P = format(100 * att / len(orfs), '.2f')
|
|
213
|
-
ctg_P = format(100 * ctg / len(orfs), '.2f')
|
|
214
|
-
other_Start_P = format(100 * other / len(orfs), '.2f')
|
|
215
|
-
return atg_P, gtg_P, ttg_P, att_P, ctg_P, other_Start_P, other_Starts
|
|
216
209
|
|
|
210
|
+
total = len(orfs) if orfs is not None else 0
|
|
211
|
+
|
|
212
|
+
if total:
|
|
213
|
+
atg_P = format(100 * atg / len(orfs), '.2f')
|
|
214
|
+
gtg_P = format(100 * gtg / len(orfs), '.2f')
|
|
215
|
+
ttg_P = format(100 * ttg / len(orfs), '.2f')
|
|
216
|
+
att_P = format(100 * att / len(orfs), '.2f')
|
|
217
|
+
ctg_P = format(100 * ctg / len(orfs), '.2f')
|
|
218
|
+
other_Start_P = format(100 * other / len(orfs), '.2f')
|
|
219
|
+
else:
|
|
220
|
+
atg_P = ttg_P = gtg_P = ctg_P = att_P = other_Start_P = format(0, '.2f')
|
|
221
|
+
|
|
222
|
+
return {
|
|
223
|
+
'ATG': (atg, atg_P),
|
|
224
|
+
'TTG': (ttg, ttg_P),
|
|
225
|
+
'GTG': (gtg, gtg_P),
|
|
226
|
+
'CTG': (ctg, ctg_P),
|
|
227
|
+
'ATT': (att, att_P),
|
|
228
|
+
'Other': (other, other_Start_P),
|
|
229
|
+
'total': total
|
|
230
|
+
}
|
|
217
231
|
|
|
218
232
|
def stop_Codon_Count(orfs):
|
|
219
233
|
tag, taa, tga, other = 0, 0, 0, 0
|
|
220
234
|
other_Stops = []
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
235
|
+
|
|
236
|
+
total = len(orfs) if orfs else 0
|
|
237
|
+
if total:
|
|
238
|
+
for orf in orfs.values():
|
|
239
|
+
codon = orf[2]
|
|
240
|
+
if codon == 'TAG':
|
|
241
|
+
tag += 1
|
|
242
|
+
elif codon == 'TAA':
|
|
243
|
+
taa += 1
|
|
244
|
+
elif codon == 'TGA':
|
|
245
|
+
tga += 1
|
|
246
|
+
else:
|
|
247
|
+
other += 1
|
|
248
|
+
other_Stops.append(codon)
|
|
249
|
+
tag_p = format(100 * tag / len(orfs), '.2f')
|
|
250
|
+
taa_p = format(100 * taa / len(orfs), '.2f')
|
|
251
|
+
tga_p = format(100 * tga / len(orfs), '.2f')
|
|
252
|
+
other_Stop_P = format(100 * other / len(orfs), '.2f')
|
|
253
|
+
else:
|
|
254
|
+
tag_p = taa_p = tga_p = other_Stop_P = format(0, '.2f')
|
|
255
|
+
|
|
236
256
|
return tag_p, taa_p, tga_p, other_Stop_P, other_Stops
|
|
237
257
|
|
|
238
258
|
|
|
@@ -260,8 +280,8 @@ def candidate_ORF_Selection(gene_Set,
|
|
|
260
280
|
if len(current_ORF_Difference) > len(candidate_ORF_Difference):
|
|
261
281
|
pos = c_Pos
|
|
262
282
|
orf_Details = c_ORF_Details
|
|
263
|
-
else:
|
|
264
|
-
|
|
283
|
+
#else:
|
|
284
|
+
#("Match filtered out")
|
|
265
285
|
return pos, orf_Details
|
|
266
286
|
|
|
267
287
|
|
|
@@ -300,6 +320,11 @@ def tool_comparison(all_orfs, dna_regions, verbose):
|
|
|
300
320
|
|
|
301
321
|
ref_genes_list = dna_regions[dna_region][2]
|
|
302
322
|
ref_genes = collections.OrderedDict()
|
|
323
|
+
|
|
324
|
+
if not ref_genes_list:
|
|
325
|
+
results[dna_region] = {}
|
|
326
|
+
continue
|
|
327
|
+
|
|
303
328
|
for d in ref_genes_list:
|
|
304
329
|
ref_genes.update(d)
|
|
305
330
|
comp.genome_Seq = dna_regions[dna_region][0]
|
|
@@ -311,6 +336,10 @@ def tool_comparison(all_orfs, dna_regions, verbose):
|
|
|
311
336
|
|
|
312
337
|
better_pos_orfs_items = [[(int(pos.split(',')[0]), int(pos.split(',')[1])), orf_Details] for pos, orf_Details in current_orfs.items()] #TODO: turn pos into tuple instead of string everywhere
|
|
313
338
|
|
|
339
|
+
if not current_orfs or not better_pos_orfs_items:
|
|
340
|
+
results[dna_region] = {}
|
|
341
|
+
continue
|
|
342
|
+
|
|
314
343
|
for gene_num, gene_details in ref_genes.items(): # Loop through each gene to compare against predicted ORFs
|
|
315
344
|
g_Start = int(gene_details[0])
|
|
316
345
|
g_Stop = int(gene_details[1])
|
|
@@ -477,10 +506,13 @@ def tool_comparison(all_orfs, dna_regions, verbose):
|
|
|
477
506
|
comp.gene_Pos_Olap.append(0)
|
|
478
507
|
elif '-' in g_Strand:
|
|
479
508
|
comp.gene_Neg_Olap.append(0)
|
|
480
|
-
####
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
509
|
+
#### avoid ValueError
|
|
510
|
+
if comp.gene_Lengths:
|
|
511
|
+
min_Gene_Length = min(comp.gene_Lengths)
|
|
512
|
+
max_Gene_Length = max(comp.gene_Lengths)
|
|
513
|
+
median_Gene_Length = np.median(comp.gene_Lengths)
|
|
514
|
+
else:
|
|
515
|
+
min_Gene_Length = max_Gene_Length = min_Length_Difference = 0
|
|
484
516
|
prev_ORF_Stop = 0
|
|
485
517
|
prev_ORF_Overlapped = False
|
|
486
518
|
for o_Positions, orf_Details in current_orfs.items():
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.1
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -57,13 +57,7 @@ Example output files from ```Annotation-Compare```, ```GFF-Adder``` and ```GFF-I
|
|
|
57
57
|
For Help: ```Annotation-Compare -h ```
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
|
|
61
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
62
|
-
#####
|
|
63
|
-
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
64
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
65
|
-
|
|
66
|
-
ORForise v1.5.0: Annotatione-Compare Run Parameters.
|
|
60
|
+
ORForise v1.5.1: Annotatione-Compare Run Parameters.
|
|
67
61
|
|
|
68
62
|
Required Arguments:
|
|
69
63
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -78,8 +72,8 @@ Optional Arguments:
|
|
|
78
72
|
name to compare output from two tools
|
|
79
73
|
|
|
80
74
|
Output:
|
|
81
|
-
-o
|
|
82
|
-
|
|
75
|
+
-o OUTDIR Define directory where detailed output should be places
|
|
76
|
+
-n OUTNAME Define output filename(s) prefix - If not provided, filename of reference annotation file will be used- <outname>_<contig_id>_ORF_Comparison.csv
|
|
83
77
|
|
|
84
78
|
Misc:
|
|
85
79
|
-v {True,False} Default - False: Print out runtime status
|
|
@@ -107,13 +101,7 @@ ORForise can be used as the example below.
|
|
|
107
101
|
For Help: ```Aggregate-Compare -h ```
|
|
108
102
|
|
|
109
103
|
```python
|
|
110
|
-
|
|
111
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
112
|
-
#####
|
|
113
|
-
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
114
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
115
|
-
|
|
116
|
-
ORForise v1.5.0: Aggregate-Compare Run Parameters.
|
|
104
|
+
ORForise v1.5.1: Aggregate-Compare Run Parameters.
|
|
117
105
|
|
|
118
106
|
Required Arguments:
|
|
119
107
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -261,13 +249,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
261
249
|
For Help: ```GFF-Adder -h ```
|
|
262
250
|
|
|
263
251
|
```python
|
|
264
|
-
|
|
265
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
266
|
-
#####
|
|
267
|
-
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
268
|
-
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
269
|
-
|
|
270
|
-
ORForise v1.5.0: GFF-Adder Run Parameters.
|
|
252
|
+
ORForise v1.5.1: GFF-Adder Run Parameters.
|
|
271
253
|
|
|
272
254
|
Required Arguments:
|
|
273
255
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -323,13 +305,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
323
305
|
|
|
324
306
|
For Help: ```GFF-Intersector -h ```
|
|
325
307
|
```python
|
|
326
|
-
|
|
327
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
328
|
-
#####
|
|
329
|
-
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
330
|
-
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
331
|
-
|
|
332
|
-
ORForise v1.5.0: GFF-Intersector Run Parameters.
|
|
308
|
+
ORForise v1.5.1: GFF-Intersector Run Parameters.
|
|
333
309
|
|
|
334
310
|
Required Arguments:
|
|
335
311
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{orforise-1.5.0 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{orforise-1.5.0 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|