ORForise 1.6.4__py3-none-any.whl → 1.6.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Convert_To_GFF.py +34 -7
- ORForise/{Aux → Tools}/TabToGFF/TabToGFF.py +72 -0
- ORForise/utils.py +1 -1
- {orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/METADATA +7 -7
- {orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/RECORD +10 -10
- {orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/WHEEL +1 -1
- /ORForise/{Aux → Tools}/TabToGFF/__init__.py +0 -0
- {orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/entry_points.txt +0 -0
- {orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/licenses/LICENSE +0 -0
- {orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/top_level.txt +0 -0
ORForise/Convert_To_GFF.py
CHANGED
|
@@ -4,13 +4,13 @@ from datetime import datetime
|
|
|
4
4
|
import os
|
|
5
5
|
import sys
|
|
6
6
|
|
|
7
|
+
|
|
7
8
|
try:
|
|
8
9
|
from .utils import *
|
|
9
|
-
from .
|
|
10
|
+
from .Tools.TabToGFF.TabToGFF import TabToGFF
|
|
10
11
|
except (ImportError, ModuleNotFoundError):
|
|
11
12
|
from utils import *
|
|
12
|
-
from
|
|
13
|
-
|
|
13
|
+
from Tools.TabToGFF.TabToGFF import TabToGFF
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
|
|
@@ -50,10 +50,37 @@ def write_gff(outpath, genome_ID, genome_DNA, input_annotation, fmt, features):
|
|
|
50
50
|
pos_ = pos.split(',')
|
|
51
51
|
start = pos_[0]
|
|
52
52
|
stop = pos_[-1]
|
|
53
|
-
strand = data
|
|
53
|
+
strand = data.get('strand', '.')
|
|
54
54
|
if fmt == 'abricate': # Currently only supports abricate format
|
|
55
|
-
info = '
|
|
56
|
-
|
|
55
|
+
info = 'abricate_annotation;accession={};database={};identity={};coverage={};product={};resistance={}'.format(
|
|
56
|
+
data.get('accession', 'unknown'),
|
|
57
|
+
data.get('database', 'unknown'),
|
|
58
|
+
data.get('identity', ''),
|
|
59
|
+
data.get('coverage', ''),
|
|
60
|
+
data.get('product', ''),
|
|
61
|
+
data.get('resistance', '')
|
|
62
|
+
)
|
|
63
|
+
elif fmt in ('amrfinder', 'amrfinderplus', 'amr'):
|
|
64
|
+
# Build a compact attribute string for amrfinder-plus output
|
|
65
|
+
info = ('amrfinder_annotation;element={};element_name={};protein_id={};type={};class={};subclass={};method={};pct_cov={};pct_id={};closest_acc={};closest_name={}').format(
|
|
66
|
+
data.get('element_symbol', ''),
|
|
67
|
+
data.get('element_name', ''),
|
|
68
|
+
data.get('protein_id', ''),
|
|
69
|
+
data.get('type', ''),
|
|
70
|
+
data.get('class', ''),
|
|
71
|
+
data.get('subclass', ''),
|
|
72
|
+
data.get('method', ''),
|
|
73
|
+
data.get('pct_coverage', ''),
|
|
74
|
+
data.get('pct_identity', ''),
|
|
75
|
+
data.get('closest_accession', ''),
|
|
76
|
+
data.get('closest_name', '')
|
|
77
|
+
)
|
|
78
|
+
else:
|
|
79
|
+
# Generic fallback: try to include any seqid/gene info if present
|
|
80
|
+
gene_id = data.get('gene') or data.get('ID') or ''
|
|
81
|
+
info = f"annotation;id={gene_id}"
|
|
82
|
+
|
|
83
|
+
entry = f"{data.get('seqid', genome_ID)}\t{fmt}\tCDS\t{start}\t{stop}\t.\t{strand}\t.\tID={info}\n"
|
|
57
84
|
out.write(entry)
|
|
58
85
|
|
|
59
86
|
|
|
@@ -79,7 +106,7 @@ def main():
|
|
|
79
106
|
required = parser.add_argument_group('Required Arguments')
|
|
80
107
|
|
|
81
108
|
required.add_argument('-i', dest='input_annotation', required=True, help='Input annotation file (tabular)')
|
|
82
|
-
required.add_argument('-fmt', dest='format', required=True, help='Input format:
|
|
109
|
+
required.add_argument('-fmt', dest='format', required=True, help='Input format: amrfinder, abricate, blast')
|
|
83
110
|
required.add_argument('-o', dest='output_dir', required=True, help='Output directory')
|
|
84
111
|
|
|
85
112
|
optional = parser.add_argument_group('Optional Arguments')
|
|
@@ -128,6 +128,76 @@ def parse_genemark(path, genome_seq, gene_ident=None):
|
|
|
128
128
|
return results
|
|
129
129
|
|
|
130
130
|
|
|
131
|
+
def parse_amrfinderplus(path, genome_seq, gene_ident=None):
|
|
132
|
+
"""
|
|
133
|
+
Parse amrfinder-plus TSV (header line present). Produces an OrderedDict
|
|
134
|
+
keyed by "start,stop" -> attrs dict similar to parse_abricate.
|
|
135
|
+
"""
|
|
136
|
+
results = collections.OrderedDict()
|
|
137
|
+
count = 0
|
|
138
|
+
with open(path, 'r') as fh:
|
|
139
|
+
header = None
|
|
140
|
+
header_map = {}
|
|
141
|
+
for i, line in enumerate(fh, 1):
|
|
142
|
+
line = line.rstrip('\n')
|
|
143
|
+
if not line:
|
|
144
|
+
continue
|
|
145
|
+
# Skip comment lines but treat the first non-empty non-comment line as header
|
|
146
|
+
if line.startswith('#'):
|
|
147
|
+
continue
|
|
148
|
+
if header is None:
|
|
149
|
+
header = line.split('\t')
|
|
150
|
+
header_map = {h.strip(): idx for idx, h in enumerate(header)}
|
|
151
|
+
continue
|
|
152
|
+
parts = line.split('\t')
|
|
153
|
+
# allow lines with fewer/more columns but avoid crashes
|
|
154
|
+
if header and len(parts) < len(header):
|
|
155
|
+
logging.warning(f"Line {i}: unexpected number of columns in amrfinder line")
|
|
156
|
+
continue
|
|
157
|
+
try:
|
|
158
|
+
start = int(parts[header_map.get('Start')])
|
|
159
|
+
end = int(parts[header_map.get('Stop')])
|
|
160
|
+
except Exception:
|
|
161
|
+
logging.warning(f"Line {i}: invalid Start/Stop in amrfinder line")
|
|
162
|
+
continue
|
|
163
|
+
strand = parts[header_map.get('Strand', '')]
|
|
164
|
+
seqid = parts[header_map.get('Contig id', '')]
|
|
165
|
+
protein_id = parts[header_map.get('Protein id', '')]
|
|
166
|
+
element_symbol = parts[header_map.get('Element symbol', '')]
|
|
167
|
+
element_name = parts[header_map.get('Element name', '')]
|
|
168
|
+
amr_type = parts[header_map.get('Type', '')]
|
|
169
|
+
amr_subtype = parts[header_map.get('Subtype', '')]
|
|
170
|
+
amr_class = parts[header_map.get('Class', '')]
|
|
171
|
+
amr_subclass = parts[header_map.get('Subclass', '')]
|
|
172
|
+
method = parts[header_map.get('Method', '')]
|
|
173
|
+
pct_cov = parts[header_map.get('% Coverage of reference', '')]
|
|
174
|
+
pct_id = parts[header_map.get('% Identity to reference', '')]
|
|
175
|
+
closest_acc = parts[header_map.get('Closest reference accession', '')]
|
|
176
|
+
closest_name = parts[header_map.get('Closest reference name', '')]
|
|
177
|
+
|
|
178
|
+
attrs = {
|
|
179
|
+
'seqid': seqid,
|
|
180
|
+
'start': start,
|
|
181
|
+
'end': end,
|
|
182
|
+
'strand': strand,
|
|
183
|
+
'protein_id': protein_id,
|
|
184
|
+
'element_symbol': element_symbol,
|
|
185
|
+
'element_name': element_name,
|
|
186
|
+
'type': amr_type,
|
|
187
|
+
'subtype': amr_subtype,
|
|
188
|
+
'class': amr_class,
|
|
189
|
+
'subclass': amr_subclass,
|
|
190
|
+
'method': method,
|
|
191
|
+
'pct_coverage': pct_cov,
|
|
192
|
+
'pct_identity': pct_id,
|
|
193
|
+
'closest_accession': closest_acc,
|
|
194
|
+
'closest_name': closest_name
|
|
195
|
+
}
|
|
196
|
+
results[f"{start},{end}"] = attrs
|
|
197
|
+
count += 1
|
|
198
|
+
return results
|
|
199
|
+
|
|
200
|
+
|
|
131
201
|
def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
|
|
132
202
|
# Should be cleaned up to use consistent format names
|
|
133
203
|
fmt = fmt.lower()
|
|
@@ -137,4 +207,6 @@ def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
|
|
|
137
207
|
return parse_abricate(input_file, genome_seq, gene_ident)
|
|
138
208
|
if fmt in ('genemark', 'gene_mark'):
|
|
139
209
|
return parse_genemark(input_file, genome_seq, gene_ident)
|
|
210
|
+
if fmt in ('amrfinder', 'amrfinderplus', 'amr'):
|
|
211
|
+
return parse_amrfinderplus(input_file, genome_seq, gene_ident)
|
|
140
212
|
raise ValueError(f"Unknown format: {fmt}")
|
ORForise/utils.py
CHANGED
|
@@ -4,7 +4,7 @@ import collections
|
|
|
4
4
|
# Constants
|
|
5
5
|
SHORT_ORF_LENGTH = 300
|
|
6
6
|
MIN_COVERAGE = 75
|
|
7
|
-
ORForise_Version = 'v1.6.
|
|
7
|
+
ORForise_Version = 'v1.6.5'
|
|
8
8
|
CLOSING=("\n####\nThank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
|
|
9
9
|
"Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
|
|
10
10
|
"#####")
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.6.
|
|
3
|
+
Version: 1.6.5
|
|
4
4
|
Summary: ORForise - A platform for analysing and comparing genome annotations.
|
|
5
5
|
Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
@@ -662,7 +662,7 @@ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```
|
|
|
662
662
|
For Help: ```Annotation-Compare -h ```
|
|
663
663
|
|
|
664
664
|
```python
|
|
665
|
-
ORForise v1.6.
|
|
665
|
+
ORForise v1.6.5: Annotatione-Compare Run Parameters.
|
|
666
666
|
|
|
667
667
|
Required Arguments:
|
|
668
668
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -730,7 +730,7 @@ ORForise can be used as the example below.
|
|
|
730
730
|
For Help: ```Aggregate-Compare -h ```
|
|
731
731
|
|
|
732
732
|
```python
|
|
733
|
-
ORForise v1.6.
|
|
733
|
+
ORForise v1.6.5: Aggregate-Compare Run Parameters.
|
|
734
734
|
|
|
735
735
|
Required Arguments:
|
|
736
736
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -811,7 +811,7 @@ GFF-Adder combines two existing annotations (GFF or other tool formats).
|
|
|
811
811
|
For Help: ```GFF-Adder -h ```
|
|
812
812
|
|
|
813
813
|
```python
|
|
814
|
-
ORForise v1.6.
|
|
814
|
+
ORForise v1.6.5: GFF-Adder Run Parameters.
|
|
815
815
|
|
|
816
816
|
Required Arguments:
|
|
817
817
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -868,7 +868,7 @@ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
|
|
|
868
868
|
[-cov COVERAGE] [--report-discordance]
|
|
869
869
|
[--report-discordance-file REPORT_DISCORDANCE_FILE]
|
|
870
870
|
|
|
871
|
-
ORForise v1.6.
|
|
871
|
+
ORForise v1.6.5: Annotation-Intersector Run Parameters
|
|
872
872
|
|
|
873
873
|
options:
|
|
874
874
|
-h, --help show this help message and exit
|
|
@@ -939,7 +939,7 @@ Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_c
|
|
|
939
939
|
|
|
940
940
|
```
|
|
941
941
|
|
|
942
|
-
#### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
|
|
942
|
+
#### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses (BETA!!!).
|
|
943
943
|
For Help: ```Convert_To_GFF.py -h ```
|
|
944
944
|
```
|
|
945
945
|
Thank you for using ORForise
|
|
@@ -947,7 +947,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
947
947
|
#####
|
|
948
948
|
usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
|
|
949
949
|
|
|
950
|
-
ORForise v1.6.
|
|
950
|
+
ORForise v1.6.5: Convert-To-GFF Run Parameters
|
|
951
951
|
|
|
952
952
|
Required Arguments:
|
|
953
953
|
-dna GENOME_DNA Genome DNA file (.fa)
|
|
@@ -2,12 +2,12 @@ ORForise/Aggregate_Compare.py,sha256=AzGOfuQLt4haw4rdCwIEag5Y7hnXHLLApkTa6_j99-A
|
|
|
2
2
|
ORForise/Annotation_Compare.py,sha256=7_LwWKDKZHBrhUWODxTJgd-tppaA4k5IvNuX4bU8_2Q,18571
|
|
3
3
|
ORForise/Annotation_Intersector.py,sha256=7VH7iHk4m1c08AeKf9vGEYuAecsywfC4AQHUlIbgQKQ,35856
|
|
4
4
|
ORForise/Comparator.py,sha256=59VfUS8d19Xa83o1AsCuowDhhe-iNr5wO4FutDpoQRs,48078
|
|
5
|
-
ORForise/Convert_To_GFF.py,sha256=
|
|
5
|
+
ORForise/Convert_To_GFF.py,sha256=N8yKhcbmtYOH3KBZFf1u2BhWOwnuVTU864RJiextnIk,7332
|
|
6
6
|
ORForise/GFF_Adder.py,sha256=PuOZl4TUN9SbMjGhkuF92UDePAnx0NdVAuWFRxR61XA,28670
|
|
7
7
|
ORForise/List_Tools.py,sha256=OZadIWAP0HJ_JYlTDqWw_EA8Mkew-26_cKOkRE4i7ro,1618
|
|
8
8
|
ORForise/StORForise.py,sha256=yRZtKXKcmevxZ_2asesYdkl-qen3MmOn9_r0vb0927I,5772
|
|
9
9
|
ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
|
-
ORForise/utils.py,sha256=
|
|
10
|
+
ORForise/utils.py,sha256=H7zOrQXn7PWzCaqOzRHyP4eG9Y54KnmhKmp9DCP6Ik0,15759
|
|
11
11
|
ORForise/Aux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
12
|
ORForise/Aux/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
|
|
13
13
|
ORForise/Aux/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -15,8 +15,6 @@ ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha2
|
|
|
15
15
|
ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
16
|
ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
17
|
ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
|
|
18
|
-
ORForise/Aux/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
|
|
19
|
-
ORForise/Aux/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
20
18
|
ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
19
|
ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
|
|
22
20
|
ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
|
|
@@ -63,11 +61,13 @@ ORForise/Tools/Prokka/Prokka.py,sha256=Kcl1ocVj6hPOfEEwf8bBAWhzWX_XAe55kwNUeM8EU
|
|
|
63
61
|
ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
62
|
ORForise/Tools/StORF-Reporter/StORF-Reporter.py,sha256=BQpFfpXtcNC4C_P4Bk5IZZ9__Xy2VNcbh7zzSDnrNOE,2647
|
|
65
63
|
ORForise/Tools/StORF-Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
64
|
+
ORForise/Tools/TabToGFF/TabToGFF.py,sha256=RoWOpW1gx7SIcih9MDjRArAF6AXKjRgyV30OCniTnB8,8334
|
|
65
|
+
ORForise/Tools/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
66
66
|
ORForise/Tools/TransDecoder/TransDecoder.py,sha256=l9y4OFxhSdPRBhUprs0yt2fxtSwyNCOv7oKO-aTvpDk,2381
|
|
67
67
|
ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
68
|
-
orforise-1.6.
|
|
69
|
-
orforise-1.6.
|
|
70
|
-
orforise-1.6.
|
|
71
|
-
orforise-1.6.
|
|
72
|
-
orforise-1.6.
|
|
73
|
-
orforise-1.6.
|
|
68
|
+
orforise-1.6.5.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
|
|
69
|
+
orforise-1.6.5.dist-info/METADATA,sha256=tZyLeg5VtXRWYvu_fULSBtUyLd5WG1f-HDWEIB2qJJw,59585
|
|
70
|
+
orforise-1.6.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
|
|
71
|
+
orforise-1.6.5.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
|
|
72
|
+
orforise-1.6.5.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
|
|
73
|
+
orforise-1.6.5.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|