ORForise 1.6.4__py3-none-any.whl → 1.6.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -4,13 +4,13 @@ from datetime import datetime
4
4
  import os
5
5
  import sys
6
6
 
7
+
7
8
  try:
8
9
  from .utils import *
9
- from .Aux.TabToGFF.TabToGFF import TabToGFF
10
+ from .Tools.TabToGFF.TabToGFF import TabToGFF
10
11
  except (ImportError, ModuleNotFoundError):
11
12
  from utils import *
12
- from ORForise.src.ORForise.Aux.TabToGFF import TabToGFF
13
-
13
+ from Tools.TabToGFF.TabToGFF import TabToGFF
14
14
 
15
15
 
16
16
 
@@ -50,10 +50,37 @@ def write_gff(outpath, genome_ID, genome_DNA, input_annotation, fmt, features):
50
50
  pos_ = pos.split(',')
51
51
  start = pos_[0]
52
52
  stop = pos_[-1]
53
- strand = data['strand']
53
+ strand = data.get('strand', '.')
54
54
  if fmt == 'abricate': # Currently only supports abricate format
55
- info = 'abricate_anotation;accession='+data['accession']+';database='+data['database']+';identity='+str(data['identity'])+';coverage='+str(data['coverage'])+';product='+data['product']+';resistance='+data['resistance']
56
- entry = f"{data['seqid']}\t{fmt}\t{'CDS'}\t{start}\t{stop}\t.\t{strand}\t.\t{'ID='}{info}\n"
55
+ info = 'abricate_annotation;accession={};database={};identity={};coverage={};product={};resistance={}'.format(
56
+ data.get('accession', 'unknown'),
57
+ data.get('database', 'unknown'),
58
+ data.get('identity', ''),
59
+ data.get('coverage', ''),
60
+ data.get('product', ''),
61
+ data.get('resistance', '')
62
+ )
63
+ elif fmt in ('amrfinder', 'amrfinderplus', 'amr'):
64
+ # Build a compact attribute string for amrfinder-plus output
65
+ info = ('amrfinder_annotation;element={};element_name={};protein_id={};type={};class={};subclass={};method={};pct_cov={};pct_id={};closest_acc={};closest_name={}').format(
66
+ data.get('element_symbol', ''),
67
+ data.get('element_name', ''),
68
+ data.get('protein_id', ''),
69
+ data.get('type', ''),
70
+ data.get('class', ''),
71
+ data.get('subclass', ''),
72
+ data.get('method', ''),
73
+ data.get('pct_coverage', ''),
74
+ data.get('pct_identity', ''),
75
+ data.get('closest_accession', ''),
76
+ data.get('closest_name', '')
77
+ )
78
+ else:
79
+ # Generic fallback: try to include any seqid/gene info if present
80
+ gene_id = data.get('gene') or data.get('ID') or ''
81
+ info = f"annotation;id={gene_id}"
82
+
83
+ entry = f"{data.get('seqid', genome_ID)}\t{fmt}\tCDS\t{start}\t{stop}\t.\t{strand}\t.\tID={info}\n"
57
84
  out.write(entry)
58
85
 
59
86
 
@@ -79,7 +106,7 @@ def main():
79
106
  required = parser.add_argument_group('Required Arguments')
80
107
 
81
108
  required.add_argument('-i', dest='input_annotation', required=True, help='Input annotation file (tabular)')
82
- required.add_argument('-fmt', dest='format', required=True, help='Input format: blast, abricate, genemark')
109
+ required.add_argument('-fmt', dest='format', required=True, help='Input format: amrfinder, abricate, blast')
83
110
  required.add_argument('-o', dest='output_dir', required=True, help='Output directory')
84
111
 
85
112
  optional = parser.add_argument_group('Optional Arguments')
@@ -128,6 +128,76 @@ def parse_genemark(path, genome_seq, gene_ident=None):
128
128
  return results
129
129
 
130
130
 
131
+ def parse_amrfinderplus(path, genome_seq, gene_ident=None):
132
+ """
133
+ Parse amrfinder-plus TSV (header line present). Produces an OrderedDict
134
+ keyed by "start,stop" -> attrs dict similar to parse_abricate.
135
+ """
136
+ results = collections.OrderedDict()
137
+ count = 0
138
+ with open(path, 'r') as fh:
139
+ header = None
140
+ header_map = {}
141
+ for i, line in enumerate(fh, 1):
142
+ line = line.rstrip('\n')
143
+ if not line:
144
+ continue
145
+ # Skip comment lines but treat the first non-empty non-comment line as header
146
+ if line.startswith('#'):
147
+ continue
148
+ if header is None:
149
+ header = line.split('\t')
150
+ header_map = {h.strip(): idx for idx, h in enumerate(header)}
151
+ continue
152
+ parts = line.split('\t')
153
+ # allow lines with fewer/more columns but avoid crashes
154
+ if header and len(parts) < len(header):
155
+ logging.warning(f"Line {i}: unexpected number of columns in amrfinder line")
156
+ continue
157
+ try:
158
+ start = int(parts[header_map.get('Start')])
159
+ end = int(parts[header_map.get('Stop')])
160
+ except Exception:
161
+ logging.warning(f"Line {i}: invalid Start/Stop in amrfinder line")
162
+ continue
163
+ strand = parts[header_map.get('Strand', '')]
164
+ seqid = parts[header_map.get('Contig id', '')]
165
+ protein_id = parts[header_map.get('Protein id', '')]
166
+ element_symbol = parts[header_map.get('Element symbol', '')]
167
+ element_name = parts[header_map.get('Element name', '')]
168
+ amr_type = parts[header_map.get('Type', '')]
169
+ amr_subtype = parts[header_map.get('Subtype', '')]
170
+ amr_class = parts[header_map.get('Class', '')]
171
+ amr_subclass = parts[header_map.get('Subclass', '')]
172
+ method = parts[header_map.get('Method', '')]
173
+ pct_cov = parts[header_map.get('% Coverage of reference', '')]
174
+ pct_id = parts[header_map.get('% Identity to reference', '')]
175
+ closest_acc = parts[header_map.get('Closest reference accession', '')]
176
+ closest_name = parts[header_map.get('Closest reference name', '')]
177
+
178
+ attrs = {
179
+ 'seqid': seqid,
180
+ 'start': start,
181
+ 'end': end,
182
+ 'strand': strand,
183
+ 'protein_id': protein_id,
184
+ 'element_symbol': element_symbol,
185
+ 'element_name': element_name,
186
+ 'type': amr_type,
187
+ 'subtype': amr_subtype,
188
+ 'class': amr_class,
189
+ 'subclass': amr_subclass,
190
+ 'method': method,
191
+ 'pct_coverage': pct_cov,
192
+ 'pct_identity': pct_id,
193
+ 'closest_accession': closest_acc,
194
+ 'closest_name': closest_name
195
+ }
196
+ results[f"{start},{end}"] = attrs
197
+ count += 1
198
+ return results
199
+
200
+
131
201
  def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
132
202
  # Should be cleaned up to use consistent format names
133
203
  fmt = fmt.lower()
@@ -137,4 +207,6 @@ def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
137
207
  return parse_abricate(input_file, genome_seq, gene_ident)
138
208
  if fmt in ('genemark', 'gene_mark'):
139
209
  return parse_genemark(input_file, genome_seq, gene_ident)
210
+ if fmt in ('amrfinder', 'amrfinderplus', 'amr'):
211
+ return parse_amrfinderplus(input_file, genome_seq, gene_ident)
140
212
  raise ValueError(f"Unknown format: {fmt}")
ORForise/utils.py CHANGED
@@ -4,7 +4,7 @@ import collections
4
4
  # Constants
5
5
  SHORT_ORF_LENGTH = 300
6
6
  MIN_COVERAGE = 75
7
- ORForise_Version = 'v1.6.4'
7
+ ORForise_Version = 'v1.6.5'
8
8
  CLOSING=("\n####\nThank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
9
9
  "Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
10
10
  "#####")
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ORForise
3
- Version: 1.6.4
3
+ Version: 1.6.5
4
4
  Summary: ORForise - A platform for analysing and comparing genome annotations.
5
5
  Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -662,7 +662,7 @@ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```
662
662
  For Help: ```Annotation-Compare -h ```
663
663
 
664
664
  ```python
665
- ORForise v1.6.4: Annotatione-Compare Run Parameters.
665
+ ORForise v1.6.5: Annotatione-Compare Run Parameters.
666
666
 
667
667
  Required Arguments:
668
668
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -730,7 +730,7 @@ ORForise can be used as the example below.
730
730
  For Help: ```Aggregate-Compare -h ```
731
731
 
732
732
  ```python
733
- ORForise v1.6.4: Aggregate-Compare Run Parameters.
733
+ ORForise v1.6.5: Aggregate-Compare Run Parameters.
734
734
 
735
735
  Required Arguments:
736
736
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -811,7 +811,7 @@ GFF-Adder combines two existing annotations (GFF or other tool formats).
811
811
  For Help: ```GFF-Adder -h ```
812
812
 
813
813
  ```python
814
- ORForise v1.6.4: GFF-Adder Run Parameters.
814
+ ORForise v1.6.5: GFF-Adder Run Parameters.
815
815
 
816
816
  Required Arguments:
817
817
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -868,7 +868,7 @@ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
868
868
  [-cov COVERAGE] [--report-discordance]
869
869
  [--report-discordance-file REPORT_DISCORDANCE_FILE]
870
870
 
871
- ORForise v1.6.4: Annotation-Intersector Run Parameters
871
+ ORForise v1.6.5: Annotation-Intersector Run Parameters
872
872
 
873
873
  options:
874
874
  -h, --help show this help message and exit
@@ -939,7 +939,7 @@ Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_c
939
939
 
940
940
  ```
941
941
 
942
- #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
942
+ #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses (BETA!!!).
943
943
  For Help: ```Convert_To_GFF.py -h ```
944
944
  ```
945
945
  Thank you for using ORForise
@@ -947,7 +947,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
947
947
  #####
948
948
  usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
949
949
 
950
- ORForise v1.6.4: Convert-To-GFF Run Parameters
950
+ ORForise v1.6.5: Convert-To-GFF Run Parameters
951
951
 
952
952
  Required Arguments:
953
953
  -dna GENOME_DNA Genome DNA file (.fa)
@@ -2,12 +2,12 @@ ORForise/Aggregate_Compare.py,sha256=AzGOfuQLt4haw4rdCwIEag5Y7hnXHLLApkTa6_j99-A
2
2
  ORForise/Annotation_Compare.py,sha256=7_LwWKDKZHBrhUWODxTJgd-tppaA4k5IvNuX4bU8_2Q,18571
3
3
  ORForise/Annotation_Intersector.py,sha256=7VH7iHk4m1c08AeKf9vGEYuAecsywfC4AQHUlIbgQKQ,35856
4
4
  ORForise/Comparator.py,sha256=59VfUS8d19Xa83o1AsCuowDhhe-iNr5wO4FutDpoQRs,48078
5
- ORForise/Convert_To_GFF.py,sha256=zkpO3vpLxA7EpKe1X1i-_IPbcU3lbwLCsh30mmeuZkI,6030
5
+ ORForise/Convert_To_GFF.py,sha256=N8yKhcbmtYOH3KBZFf1u2BhWOwnuVTU864RJiextnIk,7332
6
6
  ORForise/GFF_Adder.py,sha256=PuOZl4TUN9SbMjGhkuF92UDePAnx0NdVAuWFRxR61XA,28670
7
7
  ORForise/List_Tools.py,sha256=OZadIWAP0HJ_JYlTDqWw_EA8Mkew-26_cKOkRE4i7ro,1618
8
8
  ORForise/StORForise.py,sha256=yRZtKXKcmevxZ_2asesYdkl-qen3MmOn9_r0vb0927I,5772
9
9
  ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- ORForise/utils.py,sha256=QdXT0XkEIjMbu4ef2HDwAKa_19m8oeu4QV8oLll5gpk,15759
10
+ ORForise/utils.py,sha256=H7zOrQXn7PWzCaqOzRHyP4eG9Y54KnmhKmp9DCP6Ik0,15759
11
11
  ORForise/Aux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
12
  ORForise/Aux/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
13
13
  ORForise/Aux/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,8 +15,6 @@ ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha2
15
15
  ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
16
  ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
18
- ORForise/Aux/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
19
- ORForise/Aux/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
20
18
  ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
19
  ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
22
20
  ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
@@ -63,11 +61,13 @@ ORForise/Tools/Prokka/Prokka.py,sha256=Kcl1ocVj6hPOfEEwf8bBAWhzWX_XAe55kwNUeM8EU
63
61
  ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
62
  ORForise/Tools/StORF-Reporter/StORF-Reporter.py,sha256=BQpFfpXtcNC4C_P4Bk5IZZ9__Xy2VNcbh7zzSDnrNOE,2647
65
63
  ORForise/Tools/StORF-Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
+ ORForise/Tools/TabToGFF/TabToGFF.py,sha256=RoWOpW1gx7SIcih9MDjRArAF6AXKjRgyV30OCniTnB8,8334
65
+ ORForise/Tools/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
66
  ORForise/Tools/TransDecoder/TransDecoder.py,sha256=l9y4OFxhSdPRBhUprs0yt2fxtSwyNCOv7oKO-aTvpDk,2381
67
67
  ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
68
- orforise-1.6.4.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
69
- orforise-1.6.4.dist-info/METADATA,sha256=cJbN2ekkUs5mP8izYLMqxv8r4awotKf6DtVQNDvuPFo,59575
70
- orforise-1.6.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
71
- orforise-1.6.4.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
72
- orforise-1.6.4.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
73
- orforise-1.6.4.dist-info/RECORD,,
68
+ orforise-1.6.5.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
69
+ orforise-1.6.5.dist-info/METADATA,sha256=tZyLeg5VtXRWYvu_fULSBtUyLd5WG1f-HDWEIB2qJJw,59585
70
+ orforise-1.6.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
71
+ orforise-1.6.5.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
72
+ orforise-1.6.5.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
73
+ orforise-1.6.5.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (80.10.1)
2
+ Generator: setuptools (80.10.2)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
File without changes