ORForise 1.6.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Aggregate_Compare.py +378 -0
- ORForise/Annotation_Compare.py +317 -0
- ORForise/Annotation_Intersector.py +726 -0
- ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +53 -0
- ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/StORF_Undetected.py +35 -0
- ORForise/Aux/StORF_Undetected/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
- ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +46 -0
- ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
- ORForise/Aux/TabToGFF/__init__.py +0 -0
- ORForise/Aux/__init__.py +0 -0
- ORForise/Comparator.py +882 -0
- ORForise/Convert_To_GFF.py +141 -0
- ORForise/GFF_Adder.py +543 -0
- ORForise/List_Tools.py +56 -0
- ORForise/ORForise_Analysis/__init__.py +0 -0
- ORForise/ORForise_Analysis/cds_checker.py +77 -0
- ORForise/ORForise_Analysis/gene_Lenghts.py +28 -0
- ORForise/ORForise_Analysis/genome_Metrics.py +258 -0
- ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +88 -0
- ORForise/ORForise_Analysis/missed_Gene_Metrics.py +277 -0
- ORForise/ORForise_Analysis/parital_Match_Analysis.py +230 -0
- ORForise/ORForise_Analysis/result_File_Analysis.py +286 -0
- ORForise/ORForise_Analysis/start_Codon_Substitution.py +161 -0
- ORForise/StORForise.py +115 -0
- ORForise/Tools/Augustus/Augustus.py +54 -0
- ORForise/Tools/Augustus/__init__.py +0 -0
- ORForise/Tools/Balrog/Balrog.py +56 -0
- ORForise/Tools/Balrog/__init__.py +0 -0
- ORForise/Tools/EasyGene/EasyGene.py +55 -0
- ORForise/Tools/EasyGene/__init__.py +0 -0
- ORForise/Tools/FGENESB/FGENESB.py +57 -0
- ORForise/Tools/FGENESB/__init__.py +0 -0
- ORForise/Tools/FragGeneScan/FragGeneScan.py +54 -0
- ORForise/Tools/FragGeneScan/__init__.py +0 -0
- ORForise/Tools/GFF/GFF.py +77 -0
- ORForise/Tools/GFF/__init__.py +0 -0
- ORForise/Tools/GLIMMER3/GLIMMER3.py +59 -0
- ORForise/Tools/GLIMMER3/__init__.py +0 -0
- ORForise/Tools/GeneMark/GeneMark.py +135 -0
- ORForise/Tools/GeneMark/__init__.py +0 -0
- ORForise/Tools/GeneMarkHA/GeneMarkHA.py +54 -0
- ORForise/Tools/GeneMarkHA/__init__.py +0 -0
- ORForise/Tools/GeneMarkHMM/GeneMarkHMM.py +55 -0
- ORForise/Tools/GeneMarkHMM/__init__.py +0 -0
- ORForise/Tools/GeneMarkS/GeneMarkS.py +54 -0
- ORForise/Tools/GeneMarkS/__init__.py +0 -0
- ORForise/Tools/GeneMarkS2/GeneMarkS2.py +55 -0
- ORForise/Tools/GeneMarkS2/__init__.py +0 -0
- ORForise/Tools/MetaGene/MetaGene.py +54 -0
- ORForise/Tools/MetaGene/__init__.py +0 -0
- ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +55 -0
- ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
- ORForise/Tools/MetaGeneMark/MetaGeneMark.py +55 -0
- ORForise/Tools/MetaGeneMark/__init__.py +0 -0
- ORForise/Tools/Prodigal/Prodigal.py +55 -0
- ORForise/Tools/Prodigal/__init__.py +0 -0
- ORForise/Tools/Prokka/Prokka.py +57 -0
- ORForise/Tools/Prokka/__init__.py +0 -0
- ORForise/Tools/StORF-Reporter/StORF-Reporter.py +56 -0
- ORForise/Tools/StORF-Reporter/__init__.py +0 -0
- ORForise/Tools/TransDecoder/TransDecoder.py +54 -0
- ORForise/Tools/TransDecoder/__init__.py +0 -0
- ORForise/Tools/__init__.py +0 -0
- ORForise/__init__.py +0 -0
- ORForise/utils.py +236 -0
- orforise-1.6.2.dist-info/METADATA +1038 -0
- orforise-1.6.2.dist-info/RECORD +73 -0
- orforise-1.6.2.dist-info/WHEEL +5 -0
- orforise-1.6.2.dist-info/entry_points.txt +15 -0
- orforise-1.6.2.dist-info/licenses/LICENSE +624 -0
- orforise-1.6.2.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,141 @@
|
|
|
1
|
+
import argparse
|
|
2
|
+
import logging
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
import os
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from .utils import *
|
|
9
|
+
from .Aux.TabToGFF.TabToGFF import TabToGFF
|
|
10
|
+
except (ImportError, ModuleNotFoundError):
|
|
11
|
+
from utils import *
|
|
12
|
+
from ORForise.src.ORForise.Aux.TabToGFF import TabToGFF
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def setup_logging(outdir, verbose=False):
|
|
18
|
+
ts = datetime.now().strftime('%Y%m%d_%H%M%S')
|
|
19
|
+
logfile = None
|
|
20
|
+
logger = logging.getLogger()
|
|
21
|
+
logger.setLevel(logging.DEBUG if verbose else logging.INFO)
|
|
22
|
+
# clear existing handlers to avoid duplicates when running repeatedly
|
|
23
|
+
logger.handlers = []
|
|
24
|
+
fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
|
|
25
|
+
# Only create a file handler (and thus the logfile) when verbose is enabled
|
|
26
|
+
if verbose:
|
|
27
|
+
logfile = os.path.join(outdir, f'convert_to_gff_{ts}.log')
|
|
28
|
+
fh = logging.FileHandler(logfile)
|
|
29
|
+
fh.setLevel(logging.DEBUG)
|
|
30
|
+
fh.setFormatter(fmt)
|
|
31
|
+
logger.addHandler(fh)
|
|
32
|
+
# Always add a stdout handler
|
|
33
|
+
sh = logging.StreamHandler(sys.stdout)
|
|
34
|
+
sh.setLevel(logging.DEBUG if verbose else logging.INFO)
|
|
35
|
+
sh.setFormatter(fmt)
|
|
36
|
+
logger.addHandler(sh)
|
|
37
|
+
return logfile
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def write_gff(outpath, genome_ID, genome_DNA, input_annotation, fmt, features):
|
|
41
|
+
with open(outpath, 'w') as out:
|
|
42
|
+
out.write('##gff-version\t3\n')
|
|
43
|
+
out.write('#\tConvert_To_GFF\n')
|
|
44
|
+
out.write('#\tRun Date: ' + str(datetime.now()) + '\n')
|
|
45
|
+
# Only include genome DNA line if a path was provided
|
|
46
|
+
if genome_DNA:
|
|
47
|
+
out.write('##Genome DNA File:' + genome_DNA + '\n')
|
|
48
|
+
out.write('##Original File: ' + input_annotation + '\n')
|
|
49
|
+
for pos, data in features.items():
|
|
50
|
+
pos_ = pos.split(',')
|
|
51
|
+
start = pos_[0]
|
|
52
|
+
stop = pos_[-1]
|
|
53
|
+
strand = data['strand']
|
|
54
|
+
if fmt == 'abricate': # Currently only supports abricate format
|
|
55
|
+
info = 'abricate_anotation;accession='+data['accession']+';database='+data['database']+';identity='+str(data['identity'])+';coverage='+str(data['coverage'])+';product='+data['product']+';resistance='+data['resistance']
|
|
56
|
+
entry = f"{data['seqid']}\t{fmt}\t{'CDS'}\t{start}\t{stop}\t.\t{strand}\t.\t{'ID='}{info}\n"
|
|
57
|
+
out.write(entry)
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def load_genome(genome_fasta):
|
|
61
|
+
genome_seq = ''
|
|
62
|
+
genome_ID = 'unknown'
|
|
63
|
+
with open(genome_fasta, 'r') as fh:
|
|
64
|
+
for line in fh:
|
|
65
|
+
line = line.rstrip('\n')
|
|
66
|
+
if not line:
|
|
67
|
+
continue
|
|
68
|
+
if line.startswith('>'):
|
|
69
|
+
genome_ID = line.split()[0].lstrip('>')
|
|
70
|
+
else:
|
|
71
|
+
genome_seq += line
|
|
72
|
+
return genome_ID, genome_seq
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
def main():
|
|
76
|
+
print(WELCOME)
|
|
77
|
+
|
|
78
|
+
parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Convert-To-GFF Run Parameters')
|
|
79
|
+
parser._action_groups.pop()
|
|
80
|
+
|
|
81
|
+
required = parser.add_argument_group('Required Arguments')
|
|
82
|
+
|
|
83
|
+
required.add_argument('-i', dest='input_annotation', required=True, help='Input annotation file (tabular)')
|
|
84
|
+
required.add_argument('-fmt', dest='format', required=True, help='Input format: blast, abricate, genemark')
|
|
85
|
+
required.add_argument('-o', dest='output_dir', required=True, help='Output directory')
|
|
86
|
+
|
|
87
|
+
optional = parser.add_argument_group('Optional Arguments')
|
|
88
|
+
# Make genome DNA optional: if not provided we operate without genome sequence
|
|
89
|
+
required.add_argument('-dna', dest='genome_DNA', required=False, help='Genome DNA file (.fa)')
|
|
90
|
+
optional.add_argument('-gi', dest='gene_ident', default='CDS', required=False, help='Gene identifier types to extract (unused)')
|
|
91
|
+
optional.add_argument('--verbose', dest='verbose', action='store_true', help='Verbose logging with logfile')
|
|
92
|
+
|
|
93
|
+
options = parser.parse_args()
|
|
94
|
+
|
|
95
|
+
if not os.path.exists(options.output_dir):
|
|
96
|
+
os.makedirs(options.output_dir)
|
|
97
|
+
logfile = setup_logging(options.output_dir, verbose=options.verbose)
|
|
98
|
+
logging.info('Starting Convert_To_GFF')
|
|
99
|
+
# Log genome DNA only if provided
|
|
100
|
+
if options.genome_DNA:
|
|
101
|
+
logging.info('Genome DNA: %s', options.genome_DNA)
|
|
102
|
+
else:
|
|
103
|
+
logging.info('Genome DNA: (not provided)')
|
|
104
|
+
logging.info('Input annotation: %s', options.input_annotation)
|
|
105
|
+
logging.info('Format: %s', options.format)
|
|
106
|
+
|
|
107
|
+
# If a genome fasta was provided, load it; otherwise proceed without genome sequence
|
|
108
|
+
if options.genome_DNA:
|
|
109
|
+
if not os.path.exists(options.genome_DNA):
|
|
110
|
+
logging.error('Genome DNA file does not exist: %s', options.genome_DNA)
|
|
111
|
+
sys.exit(1)
|
|
112
|
+
genome_ID, genome_seq = load_genome(options.genome_DNA)
|
|
113
|
+
else:
|
|
114
|
+
# Derive a sensible genome_ID from the annotation filename and leave sequence empty
|
|
115
|
+
genome_ID = os.path.splitext(os.path.basename(options.input_annotation))[0]
|
|
116
|
+
genome_seq = ''
|
|
117
|
+
|
|
118
|
+
try:
|
|
119
|
+
# Build genome map expected by TabToGFF: mapping genome_ID -> tuple(sequence, ...)
|
|
120
|
+
genome_map = {genome_ID: (genome_seq,)}
|
|
121
|
+
features = TabToGFF(options.input_annotation, genome_map, options.gene_ident, fmt=options.format)
|
|
122
|
+
except Exception as e:
|
|
123
|
+
logging.exception('Error parsing input annotation')
|
|
124
|
+
sys.exit(1)
|
|
125
|
+
|
|
126
|
+
#features = sortORFs(features) - Not sorting for now to preserve original order
|
|
127
|
+
basename = os.path.basename(options.input_annotation)
|
|
128
|
+
dot = basename.rfind('.')
|
|
129
|
+
if dot != -1:
|
|
130
|
+
outname = basename[:dot] + '.gff'
|
|
131
|
+
else:
|
|
132
|
+
outname = basename + '.gff'
|
|
133
|
+
outgff = os.path.join(options.output_dir, outname)
|
|
134
|
+
# Pass the original genome path if provided, else pass None so headers adapt
|
|
135
|
+
genome_DNA_path = options.genome_DNA if options.genome_DNA else None
|
|
136
|
+
write_gff(outgff, genome_ID, genome_DNA_path, options.input_annotation, options.format, features)
|
|
137
|
+
logging.info('Wrote GFF to %s', outgff)
|
|
138
|
+
logging.info('Logfile: %s', logfile)
|
|
139
|
+
|
|
140
|
+
if __name__ == '__main__':
|
|
141
|
+
main()
|