ORForise 1.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (73) hide show
  1. ORForise/Aggregate_Compare.py +378 -0
  2. ORForise/Annotation_Compare.py +317 -0
  3. ORForise/Annotation_Intersector.py +726 -0
  4. ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +53 -0
  5. ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
  6. ORForise/Aux/StORF_Undetected/StORF_Undetected.py +35 -0
  7. ORForise/Aux/StORF_Undetected/__init__.py +0 -0
  8. ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
  9. ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +46 -0
  10. ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
  11. ORForise/Aux/TabToGFF/__init__.py +0 -0
  12. ORForise/Aux/__init__.py +0 -0
  13. ORForise/Comparator.py +882 -0
  14. ORForise/Convert_To_GFF.py +141 -0
  15. ORForise/GFF_Adder.py +543 -0
  16. ORForise/List_Tools.py +56 -0
  17. ORForise/ORForise_Analysis/__init__.py +0 -0
  18. ORForise/ORForise_Analysis/cds_checker.py +77 -0
  19. ORForise/ORForise_Analysis/gene_Lenghts.py +28 -0
  20. ORForise/ORForise_Analysis/genome_Metrics.py +258 -0
  21. ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +88 -0
  22. ORForise/ORForise_Analysis/missed_Gene_Metrics.py +277 -0
  23. ORForise/ORForise_Analysis/parital_Match_Analysis.py +230 -0
  24. ORForise/ORForise_Analysis/result_File_Analysis.py +286 -0
  25. ORForise/ORForise_Analysis/start_Codon_Substitution.py +161 -0
  26. ORForise/StORForise.py +115 -0
  27. ORForise/Tools/Augustus/Augustus.py +54 -0
  28. ORForise/Tools/Augustus/__init__.py +0 -0
  29. ORForise/Tools/Balrog/Balrog.py +56 -0
  30. ORForise/Tools/Balrog/__init__.py +0 -0
  31. ORForise/Tools/EasyGene/EasyGene.py +55 -0
  32. ORForise/Tools/EasyGene/__init__.py +0 -0
  33. ORForise/Tools/FGENESB/FGENESB.py +57 -0
  34. ORForise/Tools/FGENESB/__init__.py +0 -0
  35. ORForise/Tools/FragGeneScan/FragGeneScan.py +54 -0
  36. ORForise/Tools/FragGeneScan/__init__.py +0 -0
  37. ORForise/Tools/GFF/GFF.py +77 -0
  38. ORForise/Tools/GFF/__init__.py +0 -0
  39. ORForise/Tools/GLIMMER3/GLIMMER3.py +59 -0
  40. ORForise/Tools/GLIMMER3/__init__.py +0 -0
  41. ORForise/Tools/GeneMark/GeneMark.py +135 -0
  42. ORForise/Tools/GeneMark/__init__.py +0 -0
  43. ORForise/Tools/GeneMarkHA/GeneMarkHA.py +54 -0
  44. ORForise/Tools/GeneMarkHA/__init__.py +0 -0
  45. ORForise/Tools/GeneMarkHMM/GeneMarkHMM.py +55 -0
  46. ORForise/Tools/GeneMarkHMM/__init__.py +0 -0
  47. ORForise/Tools/GeneMarkS/GeneMarkS.py +54 -0
  48. ORForise/Tools/GeneMarkS/__init__.py +0 -0
  49. ORForise/Tools/GeneMarkS2/GeneMarkS2.py +55 -0
  50. ORForise/Tools/GeneMarkS2/__init__.py +0 -0
  51. ORForise/Tools/MetaGene/MetaGene.py +54 -0
  52. ORForise/Tools/MetaGene/__init__.py +0 -0
  53. ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +55 -0
  54. ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
  55. ORForise/Tools/MetaGeneMark/MetaGeneMark.py +55 -0
  56. ORForise/Tools/MetaGeneMark/__init__.py +0 -0
  57. ORForise/Tools/Prodigal/Prodigal.py +55 -0
  58. ORForise/Tools/Prodigal/__init__.py +0 -0
  59. ORForise/Tools/Prokka/Prokka.py +57 -0
  60. ORForise/Tools/Prokka/__init__.py +0 -0
  61. ORForise/Tools/StORF-Reporter/StORF-Reporter.py +56 -0
  62. ORForise/Tools/StORF-Reporter/__init__.py +0 -0
  63. ORForise/Tools/TransDecoder/TransDecoder.py +54 -0
  64. ORForise/Tools/TransDecoder/__init__.py +0 -0
  65. ORForise/Tools/__init__.py +0 -0
  66. ORForise/__init__.py +0 -0
  67. ORForise/utils.py +236 -0
  68. orforise-1.6.2.dist-info/METADATA +1038 -0
  69. orforise-1.6.2.dist-info/RECORD +73 -0
  70. orforise-1.6.2.dist-info/WHEEL +5 -0
  71. orforise-1.6.2.dist-info/entry_points.txt +15 -0
  72. orforise-1.6.2.dist-info/licenses/LICENSE +624 -0
  73. orforise-1.6.2.dist-info/top_level.txt +1 -0
@@ -0,0 +1,141 @@
1
+ import argparse
2
+ import logging
3
+ from datetime import datetime
4
+ import os
5
+ import sys
6
+
7
+ try:
8
+ from .utils import *
9
+ from .Aux.TabToGFF.TabToGFF import TabToGFF
10
+ except (ImportError, ModuleNotFoundError):
11
+ from utils import *
12
+ from ORForise.src.ORForise.Aux.TabToGFF import TabToGFF
13
+
14
+
15
+
16
+
17
+ def setup_logging(outdir, verbose=False):
18
+ ts = datetime.now().strftime('%Y%m%d_%H%M%S')
19
+ logfile = None
20
+ logger = logging.getLogger()
21
+ logger.setLevel(logging.DEBUG if verbose else logging.INFO)
22
+ # clear existing handlers to avoid duplicates when running repeatedly
23
+ logger.handlers = []
24
+ fmt = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
25
+ # Only create a file handler (and thus the logfile) when verbose is enabled
26
+ if verbose:
27
+ logfile = os.path.join(outdir, f'convert_to_gff_{ts}.log')
28
+ fh = logging.FileHandler(logfile)
29
+ fh.setLevel(logging.DEBUG)
30
+ fh.setFormatter(fmt)
31
+ logger.addHandler(fh)
32
+ # Always add a stdout handler
33
+ sh = logging.StreamHandler(sys.stdout)
34
+ sh.setLevel(logging.DEBUG if verbose else logging.INFO)
35
+ sh.setFormatter(fmt)
36
+ logger.addHandler(sh)
37
+ return logfile
38
+
39
+
40
+ def write_gff(outpath, genome_ID, genome_DNA, input_annotation, fmt, features):
41
+ with open(outpath, 'w') as out:
42
+ out.write('##gff-version\t3\n')
43
+ out.write('#\tConvert_To_GFF\n')
44
+ out.write('#\tRun Date: ' + str(datetime.now()) + '\n')
45
+ # Only include genome DNA line if a path was provided
46
+ if genome_DNA:
47
+ out.write('##Genome DNA File:' + genome_DNA + '\n')
48
+ out.write('##Original File: ' + input_annotation + '\n')
49
+ for pos, data in features.items():
50
+ pos_ = pos.split(',')
51
+ start = pos_[0]
52
+ stop = pos_[-1]
53
+ strand = data['strand']
54
+ if fmt == 'abricate': # Currently only supports abricate format
55
+ info = 'abricate_anotation;accession='+data['accession']+';database='+data['database']+';identity='+str(data['identity'])+';coverage='+str(data['coverage'])+';product='+data['product']+';resistance='+data['resistance']
56
+ entry = f"{data['seqid']}\t{fmt}\t{'CDS'}\t{start}\t{stop}\t.\t{strand}\t.\t{'ID='}{info}\n"
57
+ out.write(entry)
58
+
59
+
60
+ def load_genome(genome_fasta):
61
+ genome_seq = ''
62
+ genome_ID = 'unknown'
63
+ with open(genome_fasta, 'r') as fh:
64
+ for line in fh:
65
+ line = line.rstrip('\n')
66
+ if not line:
67
+ continue
68
+ if line.startswith('>'):
69
+ genome_ID = line.split()[0].lstrip('>')
70
+ else:
71
+ genome_seq += line
72
+ return genome_ID, genome_seq
73
+
74
+
75
+ def main():
76
+ print(WELCOME)
77
+
78
+ parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Convert-To-GFF Run Parameters')
79
+ parser._action_groups.pop()
80
+
81
+ required = parser.add_argument_group('Required Arguments')
82
+
83
+ required.add_argument('-i', dest='input_annotation', required=True, help='Input annotation file (tabular)')
84
+ required.add_argument('-fmt', dest='format', required=True, help='Input format: blast, abricate, genemark')
85
+ required.add_argument('-o', dest='output_dir', required=True, help='Output directory')
86
+
87
+ optional = parser.add_argument_group('Optional Arguments')
88
+ # Make genome DNA optional: if not provided we operate without genome sequence
89
+ required.add_argument('-dna', dest='genome_DNA', required=False, help='Genome DNA file (.fa)')
90
+ optional.add_argument('-gi', dest='gene_ident', default='CDS', required=False, help='Gene identifier types to extract (unused)')
91
+ optional.add_argument('--verbose', dest='verbose', action='store_true', help='Verbose logging with logfile')
92
+
93
+ options = parser.parse_args()
94
+
95
+ if not os.path.exists(options.output_dir):
96
+ os.makedirs(options.output_dir)
97
+ logfile = setup_logging(options.output_dir, verbose=options.verbose)
98
+ logging.info('Starting Convert_To_GFF')
99
+ # Log genome DNA only if provided
100
+ if options.genome_DNA:
101
+ logging.info('Genome DNA: %s', options.genome_DNA)
102
+ else:
103
+ logging.info('Genome DNA: (not provided)')
104
+ logging.info('Input annotation: %s', options.input_annotation)
105
+ logging.info('Format: %s', options.format)
106
+
107
+ # If a genome fasta was provided, load it; otherwise proceed without genome sequence
108
+ if options.genome_DNA:
109
+ if not os.path.exists(options.genome_DNA):
110
+ logging.error('Genome DNA file does not exist: %s', options.genome_DNA)
111
+ sys.exit(1)
112
+ genome_ID, genome_seq = load_genome(options.genome_DNA)
113
+ else:
114
+ # Derive a sensible genome_ID from the annotation filename and leave sequence empty
115
+ genome_ID = os.path.splitext(os.path.basename(options.input_annotation))[0]
116
+ genome_seq = ''
117
+
118
+ try:
119
+ # Build genome map expected by TabToGFF: mapping genome_ID -> tuple(sequence, ...)
120
+ genome_map = {genome_ID: (genome_seq,)}
121
+ features = TabToGFF(options.input_annotation, genome_map, options.gene_ident, fmt=options.format)
122
+ except Exception as e:
123
+ logging.exception('Error parsing input annotation')
124
+ sys.exit(1)
125
+
126
+ #features = sortORFs(features) - Not sorting for now to preserve original order
127
+ basename = os.path.basename(options.input_annotation)
128
+ dot = basename.rfind('.')
129
+ if dot != -1:
130
+ outname = basename[:dot] + '.gff'
131
+ else:
132
+ outname = basename + '.gff'
133
+ outgff = os.path.join(options.output_dir, outname)
134
+ # Pass the original genome path if provided, else pass None so headers adapt
135
+ genome_DNA_path = options.genome_DNA if options.genome_DNA else None
136
+ write_gff(outgff, genome_ID, genome_DNA_path, options.input_annotation, options.format, features)
137
+ logging.info('Wrote GFF to %s', outgff)
138
+ logging.info('Logfile: %s', logfile)
139
+
140
+ if __name__ == '__main__':
141
+ main()