ORForise 1.4.3__tar.gz → 1.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orforise-1.4.3 → orforise-1.5.1}/PKG-INFO +7 -31
- {orforise-1.4.3 → orforise-1.5.1}/README.md +6 -30
- {orforise-1.4.3 → orforise-1.5.1}/setup.cfg +6 -1
- orforise-1.5.1/src/ORForise/Aggregate_Compare.py +382 -0
- orforise-1.5.1/src/ORForise/Annotation_Compare.py +353 -0
- orforise-1.5.1/src/ORForise/Comparator.py +881 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/genome_Metrics.py +51 -33
- orforise-1.5.1/src/ORForise/Tools/Augustus/Augustus.py +42 -0
- orforise-1.5.1/src/ORForise/Tools/Balrog/Balrog.py +44 -0
- orforise-1.4.3/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py → orforise-1.5.1/src/ORForise/Tools/EasyGene/EasyGene.py +19 -13
- orforise-1.5.1/src/ORForise/Tools/FGENESB/FGENESB.py +45 -0
- orforise-1.5.1/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +42 -0
- orforise-1.5.1/src/ORForise/Tools/GFF/GFF.py +66 -0
- orforise-1.5.1/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py +47 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GeneMark/GeneMark.py +46 -40
- orforise-1.5.1/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py +42 -0
- orforise-1.4.3/src/ORForise/Tools/Balrog/Balrog.py → orforise-1.5.1/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +19 -12
- orforise-1.4.3/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py → orforise-1.5.1/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +19 -13
- orforise-1.4.3/src/ORForise/Tools/Prokka/Prokka.py → orforise-1.5.1/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +17 -14
- orforise-1.5.1/src/ORForise/Tools/MetaGene/MetaGene.py +42 -0
- orforise-1.5.1/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +43 -0
- orforise-1.5.1/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +43 -0
- orforise-1.5.1/src/ORForise/Tools/Prodigal/Prodigal.py +43 -0
- orforise-1.5.1/src/ORForise/Tools/Prokka/Prokka.py +45 -0
- orforise-1.5.1/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +44 -0
- orforise-1.5.1/src/ORForise/Tools/TransDecoder/TransDecoder.py +42 -0
- orforise-1.5.1/src/ORForise/utils.py +233 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise.egg-info/PKG-INFO +7 -31
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise.egg-info/entry_points.txt +5 -0
- orforise-1.4.3/src/ORForise/Aggregate_Compare.py +0 -197
- orforise-1.4.3/src/ORForise/Annotation_Compare.py +0 -184
- orforise-1.4.3/src/ORForise/Comparator.py +0 -801
- orforise-1.4.3/src/ORForise/Tools/Augustus/Augustus.py +0 -35
- orforise-1.4.3/src/ORForise/Tools/EasyGene/EasyGene.py +0 -35
- orforise-1.4.3/src/ORForise/Tools/FGENESB/FGENESB.py +0 -38
- orforise-1.4.3/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -35
- orforise-1.4.3/src/ORForise/Tools/GFF/GFF.py +0 -62
- orforise-1.4.3/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py +0 -40
- orforise-1.4.3/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py +0 -35
- orforise-1.4.3/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -36
- orforise-1.4.3/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -35
- orforise-1.4.3/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -39
- orforise-1.4.3/src/ORForise/Tools/MetaGene/MetaGene.py +0 -35
- orforise-1.4.3/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -36
- orforise-1.4.3/src/ORForise/Tools/Prodigal/Prodigal.py +0 -39
- orforise-1.4.3/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -35
- orforise-1.4.3/src/ORForise/utils.py +0 -31
- {orforise-1.4.3 → orforise-1.5.1}/LICENSE +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/pyproject.toml +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/GFF_Adder.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/GFF_Intersector.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/StORForise.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/Augustus/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/Balrog/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/EasyGene/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/FGENESB/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/FragGeneScan/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GFF/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GLIMMER_3/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GeneMark/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_HA/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_HMM/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_S/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/GeneMark_S_2/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/MetaGene/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/Prodigal/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/Prokka/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Reporter/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/StORF_Undetected.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/TransDecoder/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/Tools/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise/__init__.py +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise.egg-info/SOURCES.txt +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise.egg-info/dependency_links.txt +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise.egg-info/requires.txt +0 -0
- {orforise-1.4.3 → orforise-1.5.1}/src/ORForise.egg-info/top_level.txt +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.5.1
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -57,13 +57,7 @@ Example output files from ```Annotation-Compare```, ```GFF-Adder``` and ```GFF-I
|
|
|
57
57
|
For Help: ```Annotation-Compare -h ```
|
|
58
58
|
|
|
59
59
|
```python
|
|
60
|
-
|
|
61
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
62
|
-
#####
|
|
63
|
-
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
64
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
65
|
-
|
|
66
|
-
ORForise v1.4.3: Annotatione-Compare Run Parameters.
|
|
60
|
+
ORForise v1.5.1: Annotatione-Compare Run Parameters.
|
|
67
61
|
|
|
68
62
|
Required Arguments:
|
|
69
63
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -78,8 +72,8 @@ Optional Arguments:
|
|
|
78
72
|
name to compare output from two tools
|
|
79
73
|
|
|
80
74
|
Output:
|
|
81
|
-
-o
|
|
82
|
-
|
|
75
|
+
-o OUTDIR Define directory where detailed output should be places
|
|
76
|
+
-n OUTNAME Define output filename(s) prefix - If not provided, filename of reference annotation file will be used- <outname>_<contig_id>_ORF_Comparison.csv
|
|
83
77
|
|
|
84
78
|
Misc:
|
|
85
79
|
-v {True,False} Default - False: Print out runtime status
|
|
@@ -107,13 +101,7 @@ ORForise can be used as the example below.
|
|
|
107
101
|
For Help: ```Aggregate-Compare -h ```
|
|
108
102
|
|
|
109
103
|
```python
|
|
110
|
-
|
|
111
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
112
|
-
#####
|
|
113
|
-
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
114
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
115
|
-
|
|
116
|
-
ORForise v1.4.3: Aggregate-Compare Run Parameters.
|
|
104
|
+
ORForise v1.5.1: Aggregate-Compare Run Parameters.
|
|
117
105
|
|
|
118
106
|
Required Arguments:
|
|
119
107
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -261,13 +249,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
261
249
|
For Help: ```GFF-Adder -h ```
|
|
262
250
|
|
|
263
251
|
```python
|
|
264
|
-
|
|
265
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
266
|
-
#####
|
|
267
|
-
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
268
|
-
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
269
|
-
|
|
270
|
-
ORForise v1.4.3: GFF-Adder Run Parameters.
|
|
252
|
+
ORForise v1.5.1: GFF-Adder Run Parameters.
|
|
271
253
|
|
|
272
254
|
Required Arguments:
|
|
273
255
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -323,13 +305,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
323
305
|
|
|
324
306
|
For Help: ```GFF-Intersector -h ```
|
|
325
307
|
```python
|
|
326
|
-
|
|
327
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
328
|
-
#####
|
|
329
|
-
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
330
|
-
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
331
|
-
|
|
332
|
-
ORForise v1.4.3: GFF-Intersector Run Parameters.
|
|
308
|
+
ORForise v1.5.1: GFF-Intersector Run Parameters.
|
|
333
309
|
|
|
334
310
|
Required Arguments:
|
|
335
311
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -40,13 +40,7 @@ Example output files from ```Annotation-Compare```, ```GFF-Adder``` and ```GFF-I
|
|
|
40
40
|
For Help: ```Annotation-Compare -h ```
|
|
41
41
|
|
|
42
42
|
```python
|
|
43
|
-
|
|
44
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
45
|
-
#####
|
|
46
|
-
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
47
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
48
|
-
|
|
49
|
-
ORForise v1.4.3: Annotatione-Compare Run Parameters.
|
|
43
|
+
ORForise v1.5.1: Annotatione-Compare Run Parameters.
|
|
50
44
|
|
|
51
45
|
Required Arguments:
|
|
52
46
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -61,8 +55,8 @@ Optional Arguments:
|
|
|
61
55
|
name to compare output from two tools
|
|
62
56
|
|
|
63
57
|
Output:
|
|
64
|
-
-o
|
|
65
|
-
|
|
58
|
+
-o OUTDIR Define directory where detailed output should be places
|
|
59
|
+
-n OUTNAME Define output filename(s) prefix - If not provided, filename of reference annotation file will be used- <outname>_<contig_id>_ORF_Comparison.csv
|
|
66
60
|
|
|
67
61
|
Misc:
|
|
68
62
|
-v {True,False} Default - False: Print out runtime status
|
|
@@ -90,13 +84,7 @@ ORForise can be used as the example below.
|
|
|
90
84
|
For Help: ```Aggregate-Compare -h ```
|
|
91
85
|
|
|
92
86
|
```python
|
|
93
|
-
|
|
94
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
95
|
-
#####
|
|
96
|
-
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
97
|
-
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
98
|
-
|
|
99
|
-
ORForise v1.4.3: Aggregate-Compare Run Parameters.
|
|
87
|
+
ORForise v1.5.1: Aggregate-Compare Run Parameters.
|
|
100
88
|
|
|
101
89
|
Required Arguments:
|
|
102
90
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -244,13 +232,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
244
232
|
For Help: ```GFF-Adder -h ```
|
|
245
233
|
|
|
246
234
|
```python
|
|
247
|
-
|
|
248
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
249
|
-
#####
|
|
250
|
-
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
251
|
-
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
252
|
-
|
|
253
|
-
ORForise v1.4.3: GFF-Adder Run Parameters.
|
|
235
|
+
ORForise v1.5.1: GFF-Adder Run Parameters.
|
|
254
236
|
|
|
255
237
|
Required Arguments:
|
|
256
238
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -306,13 +288,7 @@ The ```-gi``` option can be used to allow for different genomic elements to be a
|
|
|
306
288
|
|
|
307
289
|
For Help: ```GFF-Intersector -h ```
|
|
308
290
|
```python
|
|
309
|
-
|
|
310
|
-
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
311
|
-
#####
|
|
312
|
-
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
313
|
-
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
314
|
-
|
|
315
|
-
ORForise v1.4.3: GFF-Intersector Run Parameters.
|
|
291
|
+
ORForise v1.5.1: GFF-Intersector Run Parameters.
|
|
316
292
|
|
|
317
293
|
Required Arguments:
|
|
318
294
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[metadata]
|
|
2
2
|
name = ORForise
|
|
3
|
-
version = 1.
|
|
3
|
+
version = 1.5.1
|
|
4
4
|
author = Nicholas Dimonaco
|
|
5
5
|
author_email = nicholas@dimonaco.co.uk
|
|
6
6
|
description = ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
@@ -29,10 +29,15 @@ include = *
|
|
|
29
29
|
[options.entry_points]
|
|
30
30
|
console_scripts =
|
|
31
31
|
Annotation-Compare = ORForise.Annotation_Compare:main
|
|
32
|
+
annotation-compare = ORForise.Annotation_Compare:main
|
|
32
33
|
Aggregate-Compare = ORForise.Aggregate_Compare:main
|
|
34
|
+
aggregate-compare = ORForise.Aggregate_Compare:main
|
|
33
35
|
StORForise = ORForise.StORForise:main
|
|
36
|
+
storforise = ORForise.StORForise:main
|
|
34
37
|
GFF-Adder = ORForise.GFF_Adder:main
|
|
38
|
+
gff-adder = ORForise.GFF_Adder:main
|
|
35
39
|
GFF-Intersector = ORForise.GFF_Intersector:main
|
|
40
|
+
gff-intersector = ORForise.GFF_Intersector:main
|
|
36
41
|
|
|
37
42
|
[egg_info]
|
|
38
43
|
tag_build =
|
|
@@ -0,0 +1,382 @@
|
|
|
1
|
+
from importlib import import_module
|
|
2
|
+
import argparse
|
|
3
|
+
import csv, os, gzip, sys
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
try:
|
|
7
|
+
from Comparator import tool_comparison
|
|
8
|
+
from utils import *
|
|
9
|
+
except ImportError:
|
|
10
|
+
from .Comparator import tool_comparison
|
|
11
|
+
from .utils import *
|
|
12
|
+
|
|
13
|
+
############################################
|
|
14
|
+
|
|
15
|
+
def comparator(options):
|
|
16
|
+
try:
|
|
17
|
+
try: # Detect whether fasta/gff files are .gz or text and read accordingly
|
|
18
|
+
fasta_in = gzip.open(options.genome_dna, 'rt')
|
|
19
|
+
dna_regions = fasta_load(fasta_in)
|
|
20
|
+
except:
|
|
21
|
+
fasta_in = open(options.genome_dna, 'r', encoding='unicode_escape')
|
|
22
|
+
dna_regions = fasta_load(fasta_in)
|
|
23
|
+
try:
|
|
24
|
+
gff_in = gzip.open(options.reference_annotation, 'rt')
|
|
25
|
+
dna_regions = gff_load(options, gff_in, dna_regions)
|
|
26
|
+
except:
|
|
27
|
+
gff_in = open(options.reference_annotation, 'r', encoding='unicode_escape')
|
|
28
|
+
dna_regions = gff_load(options, gff_in, dna_regions)
|
|
29
|
+
except AttributeError:
|
|
30
|
+
sys.exit("Attribute Error:\nStORF'ed GFF probably already exists - Must be deleted before running (-overwrite)")
|
|
31
|
+
except FileNotFoundError:
|
|
32
|
+
split_path = options.gff.split(os.sep)
|
|
33
|
+
sys.exit("Directory '" + split_path[-2] + "' missing fna/gff files")
|
|
34
|
+
###############################################
|
|
35
|
+
total_ref_genes = sum(
|
|
36
|
+
len(v[2]) if isinstance(v[2], (list, tuple, set, dict, str)) else 1 for v in dna_regions.values())
|
|
37
|
+
#############################################
|
|
38
|
+
# Collect predictions from tools
|
|
39
|
+
aggregate_Predictions = collections.OrderedDict()
|
|
40
|
+
aggregate_Tools = options.tools.split(',')
|
|
41
|
+
for i, (tool) in enumerate(aggregate_Tools):
|
|
42
|
+
tool_prediction = options.tool_predictions.split(',')[i]
|
|
43
|
+
print(tool)
|
|
44
|
+
try:
|
|
45
|
+
tool_ = import_module('Tools.' + tool + '.' + tool, package='my_current_pkg')
|
|
46
|
+
except ModuleNotFoundError:
|
|
47
|
+
try:
|
|
48
|
+
tool_ = import_module('ORForise.Tools.' + tool + '.' + tool, package='my_current_pkg')
|
|
49
|
+
except ModuleNotFoundError:
|
|
50
|
+
sys.exit("Tool not available")
|
|
51
|
+
tool_ = getattr(tool_, tool)
|
|
52
|
+
##
|
|
53
|
+
orfs = tool_(tool_prediction, dna_regions)
|
|
54
|
+
for current_contig in orfs:
|
|
55
|
+
if current_contig not in aggregate_Predictions:
|
|
56
|
+
aggregate_Predictions[current_contig] = {}
|
|
57
|
+
current_orfs = orfs[current_contig]
|
|
58
|
+
for key, value in current_orfs.items():
|
|
59
|
+
if key in aggregate_Predictions[current_contig]:
|
|
60
|
+
aggregate_Predictions[current_contig][key][-1] += '|' + tool
|
|
61
|
+
else:
|
|
62
|
+
aggregate_Predictions[current_contig][key] = value
|
|
63
|
+
|
|
64
|
+
aggregate_ORFs = {k: sortORFs(v) for k, v in aggregate_Predictions.items()}
|
|
65
|
+
results = tool_comparison(aggregate_ORFs, dna_regions, options.verbose)
|
|
66
|
+
############## Printing to std-out and optional csv file
|
|
67
|
+
# Ensure the output directory exists
|
|
68
|
+
os.makedirs(options.outdir, exist_ok=True)
|
|
69
|
+
# Use outname as a directory, basename for files is output-outname
|
|
70
|
+
base_out = os.path.join(options.outdir, f"{os.path.basename(options.outname)}")
|
|
71
|
+
|
|
72
|
+
# Prepare to collect summary stats for all contigs
|
|
73
|
+
contig_summaries = []
|
|
74
|
+
############################################# To get default output filename from input file details
|
|
75
|
+
if options.outdir:
|
|
76
|
+
# Ensure the output directory exists
|
|
77
|
+
os.makedirs(options.outdir, exist_ok=True)
|
|
78
|
+
# Use outname as a directory, basename for files is output-outname
|
|
79
|
+
base_out = os.path.join(options.outdir, f"{os.path.basename(options.outname)}")
|
|
80
|
+
with open(f"{base_out}_summary.txt", 'w', encoding='utf-8') as out_file:
|
|
81
|
+
out_file.write('Genome Used: ' + str(options.genome_dna.split('/')[-1]) + '\n')
|
|
82
|
+
if options.reference_tool:
|
|
83
|
+
out_file.write('Reference Tool Used: ' + str(options.reference_tool) + '\n')
|
|
84
|
+
else:
|
|
85
|
+
out_file.write('Reference Used: ' + str(options.reference_annotation.split('/')[-1]) + '\n')
|
|
86
|
+
out_file.write('Tool Compared: ' + str(options.tools) + '\n')
|
|
87
|
+
out_file.write('Total Number of Reference Genes: ' + str(total_ref_genes) + '\n')
|
|
88
|
+
out_file.write('Number of Contigs: ' + str(len(dna_regions)) + '\n')
|
|
89
|
+
out_file.write(
|
|
90
|
+
'Contig\tGenes\tORFs\tPerfect_Matches\tPartial_Matches\tMissed_Genes\tUnmatched_ORFs\tMulti_Matched_ORFs\n')
|
|
91
|
+
|
|
92
|
+
for dna_region, result in results.items():
|
|
93
|
+
num_current_genes = len(dna_regions[dna_region][2])
|
|
94
|
+
num_orfs = result['pred_metrics']['Number_of_ORFs']
|
|
95
|
+
num_perfect = result['pred_metrics']['Number_of_Perfect_Matches']
|
|
96
|
+
num_partial = len(result['pred_metrics']['partial_Hits'])
|
|
97
|
+
num_missed = len(result['rep_metrics']['genes_Undetected'])
|
|
98
|
+
num_unmatched = len(result['pred_metrics']['unmatched_ORFs'])
|
|
99
|
+
num_multi = len(result['pred_metrics']['multi_Matched_ORFs'])
|
|
100
|
+
|
|
101
|
+
####
|
|
102
|
+
# Tool-specific stats
|
|
103
|
+
tool_stats = {}
|
|
104
|
+
for tool in options.tools.split(','):
|
|
105
|
+
tool_stats[tool] = {
|
|
106
|
+
'perfect': 0,
|
|
107
|
+
'partial': 0,
|
|
108
|
+
'unmatched': 0,
|
|
109
|
+
'multi': 0
|
|
110
|
+
}
|
|
111
|
+
# Count perfect matches per tool
|
|
112
|
+
for key in result['pred_metrics'].get('perfect_Matches', {}):
|
|
113
|
+
for tool in options.tools.split(','):
|
|
114
|
+
if tool in key:
|
|
115
|
+
tool_stats[tool]['perfect'] += 1
|
|
116
|
+
# Count partial matches per tool
|
|
117
|
+
for key in result['pred_metrics'].get('partial_Hits', {}):
|
|
118
|
+
for tool in options.tools.split(','):
|
|
119
|
+
if tool in key:
|
|
120
|
+
tool_stats[tool]['partial'] += 1
|
|
121
|
+
# Count unmatched ORFs per tool
|
|
122
|
+
for key in result['pred_metrics'].get('unmatched_ORFs', {}):
|
|
123
|
+
for tool in options.tools.split(','):
|
|
124
|
+
if tool in key:
|
|
125
|
+
tool_stats[tool]['unmatched'] += 1
|
|
126
|
+
# Count multi-matched ORFs per tool
|
|
127
|
+
for key in result['pred_metrics'].get('multi_Matched_ORFs', {}):
|
|
128
|
+
for tool in options.tools.split(','):
|
|
129
|
+
if tool in key:
|
|
130
|
+
tool_stats[tool]['multi'] += 1
|
|
131
|
+
####
|
|
132
|
+
|
|
133
|
+
# Collect summary for this contig
|
|
134
|
+
if options.outdir:
|
|
135
|
+
contig_summaries.append([
|
|
136
|
+
dna_region, num_current_genes, num_orfs, num_perfect, num_partial, num_missed, num_unmatched, num_multi
|
|
137
|
+
])
|
|
138
|
+
###
|
|
139
|
+
num_current_genes = len(dna_regions[dna_region][2])
|
|
140
|
+
print("These are the results for: " + dna_region + '\n')
|
|
141
|
+
############################################# To get default output filename from input file details
|
|
142
|
+
genome_name = options.reference_annotation.split('/')[-1].split('.')[0]
|
|
143
|
+
rep_metric_description, rep_metrics = get_rep_metrics(result)
|
|
144
|
+
all_metric_description, all_metrics = get_all_metrics(result)
|
|
145
|
+
|
|
146
|
+
print('Current Contig: ' + str(dna_region))
|
|
147
|
+
print('Number of Genes: ' + str(num_current_genes))
|
|
148
|
+
print('Number of ORFs: ' + str(result['pred_metrics']['Number_of_ORFs']))
|
|
149
|
+
print('Perfect Matches: ' + str(result['pred_metrics']['Number_of_Perfect_Matches']) + ' [' + str(num_current_genes)+ '] - '+ format(100 * result['pred_metrics']['Number_of_Perfect_Matches']/num_current_genes,'.2f')+'%')
|
|
150
|
+
print('Partial Matches: ' + str(len(result['pred_metrics']['partial_Hits'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['partial_Hits'])/num_current_genes,'.2f')+'%')
|
|
151
|
+
print('Missed Genes: ' + str(len(result['rep_metrics']['genes_Undetected'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['rep_metrics']['genes_Undetected'])/num_current_genes,'.2f')+'%')
|
|
152
|
+
print('Unmatched ORFs: ' + str(len(result['pred_metrics']['unmatched_ORFs'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['unmatched_ORFs'])/num_current_genes,'.2f')+'%')
|
|
153
|
+
print('Multi-matched ORFs: ' + str(len(result['pred_metrics']['multi_Matched_ORFs'])) + ' [' + str(num_current_genes)+ '] - '+ format(100 * len(result['pred_metrics']['multi_Matched_ORFs'])/num_current_genes,'.2f')+'%')
|
|
154
|
+
print('Tool breakdown:')
|
|
155
|
+
for tool, stats in tool_stats.items():
|
|
156
|
+
print(
|
|
157
|
+
f" {tool}: Perfect={stats['perfect']}, Partial={stats['partial']}, Unmatched={stats['unmatched']}, Multi-matched={stats['multi']}")
|
|
158
|
+
|
|
159
|
+
if options.outdir:
|
|
160
|
+
# Prepare output directory and file names for each contig
|
|
161
|
+
contig_save = dna_region.replace('/', '_').replace('\\', '_')
|
|
162
|
+
contig_dir = os.path.join(options.outdir, contig_save)
|
|
163
|
+
os.makedirs(contig_dir, exist_ok=True)
|
|
164
|
+
summary_file = os.path.join(contig_dir, "summary.txt")
|
|
165
|
+
csv_file = os.path.join(contig_dir, "metrics.csv")
|
|
166
|
+
perfect_fasta = os.path.join(contig_dir, "perfect_matches.fasta")
|
|
167
|
+
partial_fasta = os.path.join(contig_dir, "partial_matches.fasta")
|
|
168
|
+
missed_fasta = os.path.join(contig_dir, "missed_genes.fasta")
|
|
169
|
+
unmatched_fasta = os.path.join(contig_dir, "unmatched_orfs.fasta")
|
|
170
|
+
multi_fasta = os.path.join(contig_dir, "multi_matched_orfs.fasta")
|
|
171
|
+
|
|
172
|
+
# Write summary to text file
|
|
173
|
+
with open(summary_file, 'w', encoding='utf-8') as sf:
|
|
174
|
+
sf.write('Current Contig: ' + str(dna_region) + '\n')
|
|
175
|
+
sf.write('Number of Genes: ' + str(num_current_genes) + '\n')
|
|
176
|
+
sf.write('Number of ORFs: ' + str(result['pred_metrics']['Number_of_ORFs']) + '\n')
|
|
177
|
+
sf.write('Perfect Matches: ' + str(result['pred_metrics']['Number_of_Perfect_Matches']) + ' [' + str(
|
|
178
|
+
num_current_genes) + '] - ' + format(
|
|
179
|
+
100 * result['pred_metrics']['Number_of_Perfect_Matches'] / num_current_genes, '.2f') + '%\n')
|
|
180
|
+
sf.write('Partial Matches: ' + str(len(result['pred_metrics']['partial_Hits'])) + ' [' + str(
|
|
181
|
+
num_current_genes) + '] - ' + format(
|
|
182
|
+
100 * len(result['pred_metrics']['partial_Hits']) / num_current_genes, '.2f') + '%\n')
|
|
183
|
+
sf.write('Missed Genes: ' + str(len(result['rep_metrics']['genes_Undetected'])) + ' [' + str(
|
|
184
|
+
num_current_genes) + '] - ' + format(
|
|
185
|
+
100 * len(result['rep_metrics']['genes_Undetected']) / num_current_genes, '.2f') + '%\n')
|
|
186
|
+
sf.write('Unmatched ORFs: ' + str(len(result['pred_metrics']['unmatched_ORFs'])) + ' [' + str(
|
|
187
|
+
num_current_genes) + '] - ' + format(
|
|
188
|
+
100 * len(result['pred_metrics']['unmatched_ORFs']) / num_current_genes, '.2f') + '%\n')
|
|
189
|
+
sf.write('Multi-matched ORFs: ' + str(len(result['pred_metrics']['multi_Matched_ORFs'])) + ' [' + str(
|
|
190
|
+
num_current_genes) + '] - ' + format(
|
|
191
|
+
100 * len(result['pred_metrics']['multi_Matched_ORFs']) / num_current_genes, '.2f') + '%\n')
|
|
192
|
+
sf.write('Tool breakdown:\n')
|
|
193
|
+
for tool, stats in tool_stats.items():
|
|
194
|
+
sf.write(
|
|
195
|
+
f" {tool}: Perfect={stats['perfect']}, Partial={stats['partial']}, Unmatched={stats['unmatched']}, Multi-matched={stats['multi']}\n")
|
|
196
|
+
|
|
197
|
+
# Write metrics to CSV
|
|
198
|
+
with open(csv_file, 'w', newline='\n', encoding='utf-8') as out_file:
|
|
199
|
+
tool_out = csv.writer(out_file, quoting=csv.QUOTE_NONE, escapechar=" ")
|
|
200
|
+
tool_out.writerow(['Representative_Metrics:'])
|
|
201
|
+
tool_out.writerow(rep_metric_description.split(','))
|
|
202
|
+
tool_out.writerow([*rep_metrics])
|
|
203
|
+
tool_out.writerow(['Prediction_Metrics:'])
|
|
204
|
+
tool_out.writerow(all_metric_description.split(','))
|
|
205
|
+
tool_out.writerow([*all_metrics])
|
|
206
|
+
tool_out.writerow(['Reference_CDS_Gene_Coverage_of_Genome'])
|
|
207
|
+
tool_out.writerow([''.join(map(str, result['rep_metrics']['gene_Coverage_Genome']))])
|
|
208
|
+
tool_out.writerow(['Predicted_CDS_Coverage_of_Genome'])
|
|
209
|
+
tool_out.writerow([''.join(map(str, result['pred_metrics']['orf_Coverage_Genome']))])
|
|
210
|
+
tool_out.writerow(['Matched_Predicted_CDS_Coverage_of_Genome'])
|
|
211
|
+
tool_out.writerow([''.join(map(str, result['pred_metrics']['matched_ORF_Coverage_Genome']))])
|
|
212
|
+
# tool_out.writerow(['Start_Position_Difference:'])
|
|
213
|
+
# tool_out.writerow(result.get('start_Difference', []))
|
|
214
|
+
# tool_out.writerow(['Stop_Position_Difference:'])
|
|
215
|
+
# tool_out.writerow(result.get('stop_Difference', []))
|
|
216
|
+
# tool_out.writerow(['Alternative_Starts_Predicted:'])
|
|
217
|
+
# tool_out.writerow(result.get('other_Starts', []))
|
|
218
|
+
# tool_out.writerow(['Alternative_Stops_Predicted:'])
|
|
219
|
+
# tool_out.writerow(result.get('other_Stops', []))
|
|
220
|
+
# tool_out.writerow(['Undetected_Gene_Metrics:'])
|
|
221
|
+
# tool_out.writerow([
|
|
222
|
+
# 'ATG_Start,GTG_Start,TTG_Start,ATT_Start,CTG_Start,Alternative_Start_Codon,TGA_Stop,TAA_Stop,TAG_Stop,Alternative_Stop_Codon,Median_Length,ORFs_on_Positive_Strand,ORFs_on_Negative_Strand'
|
|
223
|
+
# ])
|
|
224
|
+
# tool_out.writerow(result.get('undetected_Gene_Metrics', []))
|
|
225
|
+
# tool_out.writerow(['\nPredicted_CDSs_Without_Corresponding_Gene_In_Reference_Metrics:'])
|
|
226
|
+
# tool_out.writerow([
|
|
227
|
+
# 'ATG_Start,GTG_Start,TTG_Start,ATT_Start,CTG_Start,Alternative_Start_Codon,TGA_Stop,TAA_Stop,TAG_Stop,Alternative_Stop_Codon,Median_Length,ORFs_on_Positive_Strand,ORFs_on_Negative_Strand'
|
|
228
|
+
# ])
|
|
229
|
+
# tool_out.writerow(result.get('unmatched_ORF_Metrics', []))
|
|
230
|
+
|
|
231
|
+
# Write perfect matches to FASTA
|
|
232
|
+
with open(perfect_fasta, 'w', encoding='utf-8') as f:
|
|
233
|
+
for key, value in result['pred_metrics'].get('perfect_Matches', {}).items():
|
|
234
|
+
key_parts = key.split(',')
|
|
235
|
+
id = f">{genome_name}_{key_parts[0]}_{key_parts[1]}_{key_parts[2]}_{key_parts[5]}"
|
|
236
|
+
f.write(f"{id}\n{value}\n")
|
|
237
|
+
|
|
238
|
+
# Write partial matches to FASTA
|
|
239
|
+
with open(partial_fasta, 'w', encoding='utf- 8') as f:
|
|
240
|
+
for key, value in result['pred_metrics'].get('partial_Hits', {}).items():
|
|
241
|
+
key_parts = key.split(';')
|
|
242
|
+
gene_Seq = value[0]
|
|
243
|
+
orf_Seq = value[1]
|
|
244
|
+
f.write(f">{key_parts[0]}_gene\n{gene_Seq}\n>{key_parts[1]}_orf\n{orf_Seq}\n")
|
|
245
|
+
|
|
246
|
+
# Write missed genes to FASTA
|
|
247
|
+
with open(missed_fasta, 'w', encoding='utf-8') as f:
|
|
248
|
+
for key, value in result['rep_metrics'].get('genes_Undetected', {}).items():
|
|
249
|
+
key_parts = key.split(',')
|
|
250
|
+
id = f">{genome_name}_{key_parts[0]}_{key_parts[1]}_{key_parts[2]}"
|
|
251
|
+
f.write(f"{id}\n{value}\n")
|
|
252
|
+
|
|
253
|
+
# Write unmatched ORFs to FASTA
|
|
254
|
+
with open(unmatched_fasta, 'w', encoding='utf-8') as f:
|
|
255
|
+
for key, value in result['pred_metrics'].get('unmatched_ORFs', {}).items():
|
|
256
|
+
key_parts = key.split(',')
|
|
257
|
+
id = f">{options.tools}_{key_parts[0]}_{key_parts[1]}_{key_parts[2]}"
|
|
258
|
+
f.write(f"{id}\n{value}\n")
|
|
259
|
+
|
|
260
|
+
# Write multi-matched ORFs to FASTA
|
|
261
|
+
with open(multi_fasta, 'w', encoding='utf-8') as f:
|
|
262
|
+
for key, value in result['pred_metrics'].get('multi_Matched_ORFs', {}).items():
|
|
263
|
+
key_parts = key.split(',')
|
|
264
|
+
multi = f">Predicted_CDS:{key_parts[0]}-{key_parts[1]}_Genes:{'|'.join(value)}"
|
|
265
|
+
f.write(f"{multi}\n")
|
|
266
|
+
|
|
267
|
+
# After all contigs, append the summary table to the main summary file
|
|
268
|
+
if options.outdir and contig_summaries:
|
|
269
|
+
with open(f"{base_out}_summary.txt", 'a', encoding='utf-8') as out_file:
|
|
270
|
+
for row in contig_summaries:
|
|
271
|
+
out_file.write('\t'.join(map(str, row)) + '\n')
|
|
272
|
+
# Optionally, add overall totals
|
|
273
|
+
total_genes = sum(row[1] for row in contig_summaries)
|
|
274
|
+
total_orfs = sum(row[2] for row in contig_summaries)
|
|
275
|
+
total_perfect = sum(row[3] for row in contig_summaries)
|
|
276
|
+
total_partial = sum(row[4] for row in contig_summaries)
|
|
277
|
+
total_missed = sum(row[5] for row in contig_summaries)
|
|
278
|
+
total_unmatched = sum(row[6] for row in contig_summaries)
|
|
279
|
+
total_multi = sum(row[7] for row in contig_summaries)
|
|
280
|
+
out_file.write('\nOverall Summary:\n')
|
|
281
|
+
out_file.write(f'Number of Genes: {total_genes}\n')
|
|
282
|
+
out_file.write(f'Number of ORFs: {total_orfs}\n')
|
|
283
|
+
out_file.write(
|
|
284
|
+
f'Perfect Matches: {total_perfect} [{total_genes}] - {format(100 * total_perfect / total_genes, ".2f")}%\n')
|
|
285
|
+
out_file.write(
|
|
286
|
+
f'Partial Matches: {total_partial} [{total_genes}] - {format(100 * total_partial / total_genes, ".2f")}%\n')
|
|
287
|
+
out_file.write(
|
|
288
|
+
f'Missed Genes: {total_missed} [{total_genes}] - {format(100 * total_missed / total_genes, ".2f")}%\n')
|
|
289
|
+
out_file.write(
|
|
290
|
+
f'Unmatched ORFs: {total_unmatched} [{total_genes}] - {format(100 * total_unmatched / total_genes, ".2f")}%\n')
|
|
291
|
+
out_file.write(
|
|
292
|
+
f'Multi-matched ORFs: {total_multi} [{total_genes}] - {format(100 * total_multi / total_genes, ".2f")}%\n')
|
|
293
|
+
|
|
294
|
+
# Calculate combined tool stats - could be optimised further
|
|
295
|
+
combined_tool_stats = {tool: {'perfect': 0, 'partial': 0, 'unmatched': 0, 'multi': 0} for tool in
|
|
296
|
+
options.tools.split(',')}
|
|
297
|
+
for dna_region, result in results.items():
|
|
298
|
+
for tool in options.tools.split(','):
|
|
299
|
+
# perfect
|
|
300
|
+
for key in result['pred_metrics'].get('perfect_Matches', {}):
|
|
301
|
+
if tool in key:
|
|
302
|
+
combined_tool_stats[tool]['perfect'] += 1
|
|
303
|
+
# partial
|
|
304
|
+
for key in result['pred_metrics'].get('partial_Hits', {}):
|
|
305
|
+
if tool in key:
|
|
306
|
+
combined_tool_stats[tool]['partial'] += 1
|
|
307
|
+
# unmatched
|
|
308
|
+
for key in result['pred_metrics'].get('unmatched_ORFs', {}):
|
|
309
|
+
if tool in key:
|
|
310
|
+
combined_tool_stats[tool]['unmatched'] += 1
|
|
311
|
+
# multi
|
|
312
|
+
for key in result['pred_metrics'].get('multi_Matched_ORFs', {}):
|
|
313
|
+
if tool in key:
|
|
314
|
+
combined_tool_stats[tool]['multi'] += 1
|
|
315
|
+
for tool, stats in combined_tool_stats.items():
|
|
316
|
+
out_file.write('\n'+
|
|
317
|
+
f" {tool}: Perfect={stats['perfect']}, Partial={stats['partial']}, Unmatched={stats['unmatched']}, Multi-matched={stats['multi']}\n"
|
|
318
|
+
)
|
|
319
|
+
|
|
320
|
+
# Print combined metrics to stdout
|
|
321
|
+
print("\nCombined metrics for all contigs:")
|
|
322
|
+
print(f'Number of Genes: {total_genes}')
|
|
323
|
+
print(f'Number of ORFs: {total_orfs}')
|
|
324
|
+
print(
|
|
325
|
+
f'Perfect Matches: {total_perfect} [{total_genes}] - {format(100 * total_perfect / total_genes, ".2f")}%')
|
|
326
|
+
print(
|
|
327
|
+
f'Partial Matches: {total_partial} [{total_genes}] - {format(100 * total_partial / total_genes, ".2f")}%')
|
|
328
|
+
print(f'Missed Genes: {total_missed} [{total_genes}] - {format(100 * total_missed / total_genes, ".2f")}%')
|
|
329
|
+
print(
|
|
330
|
+
f'Unmatched ORFs: {total_unmatched} [{total_genes}] - {format(100 * total_unmatched / total_genes, ".2f")}%')
|
|
331
|
+
print(
|
|
332
|
+
f'Multi-matched ORFs: {total_multi} [{total_genes}] - {format(100 * total_multi / total_genes, ".2f")}%')
|
|
333
|
+
|
|
334
|
+
print('Tool breakdown (combined):')
|
|
335
|
+
for tool, stats in combined_tool_stats.items():
|
|
336
|
+
print('\n'+
|
|
337
|
+
f" {tool}: Perfect={stats['perfect']}, Partial={stats['partial']}, Unmatched={stats['unmatched']}, Multi-matched={stats['multi']}"
|
|
338
|
+
)
|
|
339
|
+
|
|
340
|
+
|
|
341
|
+
def main():
|
|
342
|
+
print("Thank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
|
|
343
|
+
"Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
|
|
344
|
+
"#####")
|
|
345
|
+
|
|
346
|
+
parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Aggregate-Compare Run Parameters.')
|
|
347
|
+
parser._action_groups.pop()
|
|
348
|
+
|
|
349
|
+
required = parser.add_argument_group('Required Arguments')
|
|
350
|
+
|
|
351
|
+
required.add_argument('-dna', dest='genome_dna', required=True, help='Genome DNA file (.fa) which both annotations '
|
|
352
|
+
'are based on')
|
|
353
|
+
required.add_argument('-t', dest='tools', required=True, help='Which tools to analyse? (Prodigal,GeneMarkS)')
|
|
354
|
+
required.add_argument('-tp', dest='tool_predictions', required=True, help='Tool genome prediction file (.gff) - Provide'
|
|
355
|
+
'file locations for each tool comma separated')
|
|
356
|
+
required.add_argument('-ref', dest='reference_annotation', required=True,
|
|
357
|
+
help='Which reference annotation file to use as reference?')
|
|
358
|
+
|
|
359
|
+
optional = parser.add_argument_group('Optional Arguments')
|
|
360
|
+
optional.add_argument('-gene_ident', action='store', dest='gene_ident', default='CDS',
|
|
361
|
+
help='What features to consider as genes? - Default: CDS - '
|
|
362
|
+
'Provide comma separated list of features to consider as genes (e.g. CDS,exon)')
|
|
363
|
+
optional.add_argument('-rt', dest='reference_tool', required=False,
|
|
364
|
+
help='What type of Annotation to compare to? -- Leave blank for Ensembl reference'
|
|
365
|
+
'- Provide tool name to compare output from two tools')
|
|
366
|
+
|
|
367
|
+
output = parser.add_argument_group('Output')
|
|
368
|
+
output.add_argument('-o', dest='outdir', required=False,
|
|
369
|
+
help='Define directory where detailed output should be places - If not provided, summary will be printed to std-out')
|
|
370
|
+
output.add_argument('-n', dest='outname', required=False,
|
|
371
|
+
help='Define output file name - Mandatory is -o is provided: <outname>_<contig_id>_ORF_Comparison.csv')
|
|
372
|
+
|
|
373
|
+
misc = parser.add_argument_group('Misc')
|
|
374
|
+
misc.add_argument('-v', dest='verbose', default='False', type=eval, choices=[True, False],
|
|
375
|
+
help='Default - False: Print out runtime status')
|
|
376
|
+
options = parser.parse_args()
|
|
377
|
+
comparator(options)
|
|
378
|
+
|
|
379
|
+
if __name__ == "__main__":
|
|
380
|
+
main()
|
|
381
|
+
print("Complete")
|
|
382
|
+
|