ORForise 1.6.3__tar.gz → 1.6.5__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {orforise-1.6.3 → orforise-1.6.5}/PKG-INFO +7 -7
  2. {orforise-1.6.3 → orforise-1.6.5}/README.md +6 -6
  3. {orforise-1.6.3 → orforise-1.6.5}/pyproject.toml +1 -1
  4. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aggregate_Compare.py +7 -4
  5. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Annotation_Compare.py +7 -4
  6. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Annotation_Intersector.py +89 -76
  7. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Convert_To_GFF.py +41 -10
  8. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/GFF_Adder.py +7 -4
  9. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/List_Tools.py +7 -4
  10. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/StORForise.py +7 -2
  11. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GFF/GFF.py +2 -2
  12. {orforise-1.6.3/src/ORForise/Aux → orforise-1.6.5/src/ORForise/Tools}/TabToGFF/TabToGFF.py +72 -0
  13. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/utils.py +2 -2
  14. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise.egg-info/PKG-INFO +7 -7
  15. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise.egg-info/SOURCES.txt +2 -2
  16. {orforise-1.6.3 → orforise-1.6.5}/LICENSE +0 -0
  17. {orforise-1.6.3 → orforise-1.6.5}/setup.cfg +0 -0
  18. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
  19. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
  20. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aux/StORF_Undetected/StORF_Undetected.py +0 -0
  21. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aux/StORF_Undetected/__init__.py +0 -0
  22. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
  23. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
  24. {orforise-1.6.3/src/ORForise/Aux/TabToGFF → orforise-1.6.5/src/ORForise/Aux}/__init__.py +0 -0
  25. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Comparator.py +0 -0
  26. {orforise-1.6.3/src/ORForise/Aux → orforise-1.6.5/src/ORForise/ORForise_Analysis}/__init__.py +0 -0
  27. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
  28. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
  29. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
  30. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
  31. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
  32. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
  33. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
  34. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
  35. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
  36. {orforise-1.6.3/src/ORForise/ORForise_Analysis → orforise-1.6.5/src/ORForise/Tools/Augustus}/__init__.py +0 -0
  37. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
  38. {orforise-1.6.3/src/ORForise/Tools/Augustus → orforise-1.6.5/src/ORForise/Tools/Balrog}/__init__.py +0 -0
  39. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/EasyGene/EasyGene.py +0 -0
  40. {orforise-1.6.3/src/ORForise/Tools/Balrog → orforise-1.6.5/src/ORForise/Tools/EasyGene}/__init__.py +0 -0
  41. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
  42. {orforise-1.6.3/src/ORForise/Tools/EasyGene → orforise-1.6.5/src/ORForise/Tools/FGENESB}/__init__.py +0 -0
  43. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
  44. {orforise-1.6.3/src/ORForise/Tools/FGENESB → orforise-1.6.5/src/ORForise/Tools/FragGeneScan}/__init__.py +0 -0
  45. {orforise-1.6.3/src/ORForise/Tools/FragGeneScan → orforise-1.6.5/src/ORForise/Tools/GFF}/__init__.py +0 -0
  46. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GLIMMER3/GLIMMER3.py +0 -0
  47. {orforise-1.6.3/src/ORForise/Tools/GFF → orforise-1.6.5/src/ORForise/Tools/GLIMMER3}/__init__.py +0 -0
  48. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
  49. {orforise-1.6.3/src/ORForise/Tools/GLIMMER3 → orforise-1.6.5/src/ORForise/Tools/GeneMark}/__init__.py +0 -0
  50. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GeneMarkHA/GeneMarkHA.py +0 -0
  51. {orforise-1.6.3/src/ORForise/Tools/GeneMark → orforise-1.6.5/src/ORForise/Tools/GeneMarkHA}/__init__.py +0 -0
  52. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GeneMarkHMM/GeneMarkHMM.py +0 -0
  53. {orforise-1.6.3/src/ORForise/Tools/GeneMarkHA → orforise-1.6.5/src/ORForise/Tools/GeneMarkHMM}/__init__.py +0 -0
  54. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GeneMarkS/GeneMarkS.py +0 -0
  55. {orforise-1.6.3/src/ORForise/Tools/GeneMarkHMM → orforise-1.6.5/src/ORForise/Tools/GeneMarkS}/__init__.py +0 -0
  56. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/GeneMarkS2/GeneMarkS2.py +0 -0
  57. {orforise-1.6.3/src/ORForise/Tools/GeneMarkS → orforise-1.6.5/src/ORForise/Tools/GeneMarkS2}/__init__.py +0 -0
  58. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
  59. {orforise-1.6.3/src/ORForise/Tools/GeneMarkS2 → orforise-1.6.5/src/ORForise/Tools/MetaGene}/__init__.py +0 -0
  60. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
  61. {orforise-1.6.3/src/ORForise/Tools/MetaGene → orforise-1.6.5/src/ORForise/Tools/MetaGeneAnnotator}/__init__.py +0 -0
  62. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
  63. {orforise-1.6.3/src/ORForise/Tools/MetaGeneAnnotator → orforise-1.6.5/src/ORForise/Tools/MetaGeneMark}/__init__.py +0 -0
  64. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/Prodigal/Prodigal.py +0 -0
  65. {orforise-1.6.3/src/ORForise/Tools/MetaGeneMark → orforise-1.6.5/src/ORForise/Tools/Prodigal}/__init__.py +0 -0
  66. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
  67. {orforise-1.6.3/src/ORForise/Tools/Prodigal → orforise-1.6.5/src/ORForise/Tools/Prokka}/__init__.py +0 -0
  68. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/StORF-Reporter/StORF-Reporter.py +0 -0
  69. {orforise-1.6.3/src/ORForise/Tools/Prokka → orforise-1.6.5/src/ORForise/Tools/StORF-Reporter}/__init__.py +0 -0
  70. {orforise-1.6.3/src/ORForise/Tools/StORF-Reporter → orforise-1.6.5/src/ORForise/Tools/TabToGFF}/__init__.py +0 -0
  71. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
  72. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/TransDecoder/__init__.py +0 -0
  73. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/Tools/__init__.py +0 -0
  74. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise/__init__.py +0 -0
  75. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise.egg-info/dependency_links.txt +0 -0
  76. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise.egg-info/entry_points.txt +0 -0
  77. {orforise-1.6.3 → orforise-1.6.5}/src/ORForise.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ORForise
3
- Version: 1.6.3
3
+ Version: 1.6.5
4
4
  Summary: ORForise - A platform for analysing and comparing genome annotations.
5
5
  Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -662,7 +662,7 @@ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```
662
662
  For Help: ```Annotation-Compare -h ```
663
663
 
664
664
  ```python
665
- ORForise v1.6.3: Annotatione-Compare Run Parameters.
665
+ ORForise v1.6.5: Annotatione-Compare Run Parameters.
666
666
 
667
667
  Required Arguments:
668
668
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -730,7 +730,7 @@ ORForise can be used as the example below.
730
730
  For Help: ```Aggregate-Compare -h ```
731
731
 
732
732
  ```python
733
- ORForise v1.6.3: Aggregate-Compare Run Parameters.
733
+ ORForise v1.6.5: Aggregate-Compare Run Parameters.
734
734
 
735
735
  Required Arguments:
736
736
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -811,7 +811,7 @@ GFF-Adder combines two existing annotations (GFF or other tool formats).
811
811
  For Help: ```GFF-Adder -h ```
812
812
 
813
813
  ```python
814
- ORForise v1.6.3: GFF-Adder Run Parameters.
814
+ ORForise v1.6.5: GFF-Adder Run Parameters.
815
815
 
816
816
  Required Arguments:
817
817
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -868,7 +868,7 @@ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
868
868
  [-cov COVERAGE] [--report-discordance]
869
869
  [--report-discordance-file REPORT_DISCORDANCE_FILE]
870
870
 
871
- ORForise v1.6.3: Annotation-Intersector Run Parameters
871
+ ORForise v1.6.5: Annotation-Intersector Run Parameters
872
872
 
873
873
  options:
874
874
  -h, --help show this help message and exit
@@ -939,7 +939,7 @@ Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_c
939
939
 
940
940
  ```
941
941
 
942
- #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
942
+ #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses (BETA!!!).
943
943
  For Help: ```Convert_To_GFF.py -h ```
944
944
  ```
945
945
  Thank you for using ORForise
@@ -947,7 +947,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
947
947
  #####
948
948
  usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
949
949
 
950
- ORForise v1.6.3: Convert-To-GFF Run Parameters
950
+ ORForise v1.6.5: Convert-To-GFF Run Parameters
951
951
 
952
952
  Required Arguments:
953
953
  -dna GENOME_DNA Genome DNA file (.fa)
@@ -21,7 +21,7 @@ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```
21
21
  For Help: ```Annotation-Compare -h ```
22
22
 
23
23
  ```python
24
- ORForise v1.6.3: Annotatione-Compare Run Parameters.
24
+ ORForise v1.6.5: Annotatione-Compare Run Parameters.
25
25
 
26
26
  Required Arguments:
27
27
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -89,7 +89,7 @@ ORForise can be used as the example below.
89
89
  For Help: ```Aggregate-Compare -h ```
90
90
 
91
91
  ```python
92
- ORForise v1.6.3: Aggregate-Compare Run Parameters.
92
+ ORForise v1.6.5: Aggregate-Compare Run Parameters.
93
93
 
94
94
  Required Arguments:
95
95
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -170,7 +170,7 @@ GFF-Adder combines two existing annotations (GFF or other tool formats).
170
170
  For Help: ```GFF-Adder -h ```
171
171
 
172
172
  ```python
173
- ORForise v1.6.3: GFF-Adder Run Parameters.
173
+ ORForise v1.6.5: GFF-Adder Run Parameters.
174
174
 
175
175
  Required Arguments:
176
176
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -227,7 +227,7 @@ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
227
227
  [-cov COVERAGE] [--report-discordance]
228
228
  [--report-discordance-file REPORT_DISCORDANCE_FILE]
229
229
 
230
- ORForise v1.6.3: Annotation-Intersector Run Parameters
230
+ ORForise v1.6.5: Annotation-Intersector Run Parameters
231
231
 
232
232
  options:
233
233
  -h, --help show this help message and exit
@@ -298,7 +298,7 @@ Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_c
298
298
 
299
299
  ```
300
300
 
301
- #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
301
+ #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses (BETA!!!).
302
302
  For Help: ```Convert_To_GFF.py -h ```
303
303
  ```
304
304
  Thank you for using ORForise
@@ -306,7 +306,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
306
306
  #####
307
307
  usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
308
308
 
309
- ORForise v1.6.3: Convert-To-GFF Run Parameters
309
+ ORForise v1.6.5: Convert-To-GFF Run Parameters
310
310
 
311
311
  Required Arguments:
312
312
  -dna GENOME_DNA Genome DNA file (.fa)
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
4
4
 
5
5
  [project]
6
6
  name = "ORForise"
7
- version = "1.6.3"
7
+ version = "1.6.5"
8
8
  authors = [
9
9
  {name = "Nicholas Dimonaco", email = "nicholas@dimonaco.co.uk"}
10
10
  ]
@@ -337,7 +337,6 @@ def comparator(options):
337
337
 
338
338
 
339
339
  def main():
340
- print(WELCOME)
341
340
 
342
341
  parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Aggregate-Compare Run Parameters.')
343
342
  parser._action_groups.pop()
@@ -373,6 +372,10 @@ def main():
373
372
  comparator(options)
374
373
 
375
374
  if __name__ == "__main__":
376
- main()
377
- print("Complete")
378
-
375
+ try:
376
+ try:
377
+ main()
378
+ except Exception:
379
+ print('Unhandled exception in main')
380
+ finally:
381
+ print(CLOSING)
@@ -259,8 +259,6 @@ def comparator(options):
259
259
 
260
260
 
261
261
  def main():
262
- print(WELCOME)
263
-
264
262
  parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Annotatione-Compare Run Parameters.')
265
263
  parser._action_groups.pop()
266
264
 
@@ -313,5 +311,10 @@ def main():
313
311
  comparator(options)
314
312
 
315
313
  if __name__ == "__main__":
316
- main()
317
- print("Complete")
314
+ try:
315
+ try:
316
+ main()
317
+ except Exception:
318
+ logging.exception('Unhandled exception in main')
319
+ finally:
320
+ print(CLOSING)
@@ -26,13 +26,12 @@ def gff_writer(genome_ID, genome_DNA, reference_annotation, reference_tool, ref_
26
26
  if out_dir:
27
27
  os.makedirs(out_dir, exist_ok=True)
28
28
 
29
- # Always open the file and write the header first. Use a broad try so we can log any issue.
30
29
  try:
31
30
  with open(output_file, 'w', encoding='utf-8') as write_out:
32
31
  write_out.write("##gff-version\t3\n#\tAnnotation-Intersector\n#\tRun Date:" + str(date.today()) + '\n')
33
32
  if genome_DNA:
34
33
  write_out.write("##Genome DNA File:" + genome_DNA + '\n')
35
- write_out.write("##Original File: " + reference_annotation + "\n##Intersecting File: " + additional_annotation + '\n')
34
+ write_out.write("##Original File: " + (reference_annotation or '') + "\n##Intersecting File: " + (additional_annotation or '') + '\n')
36
35
 
37
36
  entries_written = 0
38
37
 
@@ -43,7 +42,7 @@ def gff_writer(genome_ID, genome_DNA, reference_annotation, reference_tool, ref_
43
42
 
44
43
  # Parse reference annotation and write features matching gene_ident
45
44
  try:
46
- if reference_annotation.endswith('.gz'):
45
+ if reference_annotation and reference_annotation.endswith('.gz'):
47
46
  rf = gzip.open(reference_annotation, 'rt')
48
47
  else:
49
48
  rf = open(reference_annotation, 'r', encoding='unicode_escape')
@@ -68,10 +67,11 @@ def gff_writer(genome_ID, genome_DNA, reference_annotation, reference_tool, ref_
68
67
  stop = parts[4]
69
68
  strand = parts[6]
70
69
  info = parts[8]
70
+ source = parts[1] if len(parts) > 1 else ''
71
71
  except Exception:
72
72
  continue
73
- # write entry with coverage 0 and empty additional annotation
74
- entry = f"{seqid}\t{os.path.splitext(os.path.basename(reference_annotation))[0]}\t{ftype}\t{start}\t{stop}\t.\t{strand}\t.\tID=Original_Annotation={info};Additional_Annotation=;Coverage=0\n"
73
+ # write entry with coverage 0 and empty additional annotation; use source from input GFF
74
+ entry = f"{seqid}\t{source}\t{ftype}\t{start}\t{stop}\t.\t{strand}\t.\tID=Original_Annotation={info}\n"
75
75
  write_out.write(entry)
76
76
  entries_written += 1
77
77
  except Exception as e:
@@ -81,25 +81,26 @@ def gff_writer(genome_ID, genome_DNA, reference_annotation, reference_tool, ref_
81
81
  logging.info('Wrote %d fallback reference entries to %s', entries_written, output_file)
82
82
  return
83
83
 
84
+ # Iterate contigs and write kept entries. Kept entry layout expected:
85
+ # [strand, coverage, additional_type, ref_type, additional_info, ref_info, ref_source]
84
86
  for contig, genes in genes_To_Keep_by_contig.items():
85
- # Use basename without extension for the source field
86
- ref = os.path.splitext(os.path.basename(reference_annotation))[0].split('_')[0]
87
+ fallback_source = os.path.splitext(os.path.basename(reference_annotation))[0].split('_')[0] if reference_annotation else 'reference'
87
88
  for pos, data in genes.items():
88
89
  try:
89
90
  pos_ = pos.split(',')
90
91
  start = pos_[0]
91
92
  stop = pos_[-1]
92
- strand = data[0]
93
- # Ensure indices exist and are strings
93
+ strand = data[0] if len(data) > 0 else '.'
94
94
  add_ann = str(data[4]) if len(data) > 4 else ''
95
95
  orig_ann = str(data[5]) if len(data) > 5 else ''
96
+ source_field = data[6] if len(data) > 6 and data[6] else fallback_source
97
+ feat_type = data[3] if len(data) > 3 and data[3] else (data[2] if len(data) > 2 else 'CDS')
96
98
  entry = (
97
- contig + '\t' + ref + '\t' + data[2] + '\t' + start + '\t' + stop + '\t.\t' + strand + '\t.\tID=Original_Annotation=' + orig_ann + ';Additional_Annotation=' + add_ann + ';Coverage=' + str(
99
+ contig + '\t' + source_field + '\t' + feat_type + '\t' + start + '\t' + stop + '\t.\t' + strand + '\t.\tID=Original_Annotation=' + orig_ann + ';Additional_Annotation=' + add_ann + ';Coverage=' + str(
98
100
  data[1]) + '\n')
99
101
  write_out.write(entry)
100
102
  entries_written += 1
101
103
  except Exception as e:
102
- # Log the bad entry and continue
103
104
  logging.warning('Skipping bad GFF entry for contig %s pos %s: %s', contig, pos, e)
104
105
  continue
105
106
 
@@ -179,7 +180,9 @@ def _write_discordance_report(report_path, entries):
179
180
 
180
181
 
181
182
  def _write_discordance_gff(report_path, entries, reference_annotation_basename):
182
- """Write a list of discordance entries (dicts) to a GFF file."""
183
+ """Write a list of discordance entries (dicts) to a GFF file.
184
+ The GFF source column is taken from carried 'ref_source' or 'add_source' when available.
185
+ """
183
186
  report_path = os.path.expanduser(report_path)
184
187
  out_dir = os.path.dirname(report_path)
185
188
  if out_dir:
@@ -189,40 +192,35 @@ def _write_discordance_gff(report_path, entries, reference_annotation_basename):
189
192
  fh.write('##gff-version\t3\n')
190
193
  fh.write('#\tAnnotation-Intersector discordance report\n')
191
194
  fh.write('#\tRun Date:' + str(date.today()) + '\n')
192
- fh.write('##Original File: ' + reference_annotation_basename + '\n')
195
+ fh.write('##Original File: ' + (reference_annotation_basename or '') + '\n')
193
196
  entries_written = 0
194
- for e in entries:
197
+ for e in (entries or []):
195
198
  try:
196
199
  contig = str(e.get('contig', '.'))
197
- # prefer reference coords if present
198
200
  ref_pos = e.get('ref_pos', '')
199
201
  add_pos = e.get('add_pos', '')
200
202
  if ref_pos:
201
203
  start, stop = ref_pos.split(',')
202
204
  ftype = e.get('ref_type', '') or 'CDS'
203
- source = reference_annotation_basename.split('_')[0] or 'reference'
205
+ source = e.get('ref_source') or (reference_annotation_basename.split('_')[0] if reference_annotation_basename else 'reference')
204
206
  info_attr = e.get('ref_info', '')
205
207
  else:
206
- # No ref pos, use add_pos coords
207
- start, stop = add_pos.split(',') if add_pos else ('0','0')
208
+ start, stop = add_pos.split(',') if add_pos else ('0', '0')
208
209
  ftype = e.get('add_type', '') or 'CDS'
209
- source = e.get('add_type', '') or 'additional'
210
+ source = e.get('add_source') or 'additional'
210
211
  info_attr = e.get('add_info', '')
211
- # attributes
212
212
  attrs = []
213
213
  attrs.append('Status=' + str(e.get('status', '')))
214
214
  attrs.append('Coverage=' + str(e.get('coverage', '')))
215
215
  if e.get('ref_info', ''):
216
- attrs.append('Ref_info=' + str(e.get('ref_info', '')).replace(';','%3B'))
216
+ attrs.append('Ref_info=' + str(e.get('ref_info', '')).replace(';', '%3B'))
217
217
  if e.get('add_info', ''):
218
- attrs.append('Add_info=' + str(e.get('add_info', '')).replace(';','%3B'))
218
+ attrs.append('Add_info=' + str(e.get('add_info', '')).replace(';', '%3B'))
219
219
  attr_str = ';'.join(attrs)
220
- # construct GFF line
221
220
  line = f"{contig}\t{source}\t{ftype}\t{start}\t{stop}\t.\t.\t.\t{attr_str}\n"
222
221
  fh.write(line)
223
222
  entries_written += 1
224
223
  except Exception:
225
- # skip bad entry
226
224
  continue
227
225
  logging.info('Wrote %d discordance GFF entries to %s', entries_written, report_path)
228
226
  except OSError as e:
@@ -236,12 +234,9 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
236
234
  - only_in_additional: additional ORFs that don't overlap any reference entry
237
235
  - mismatches: reference entries with overlapping additional ORFs that don't meet match criteria
238
236
 
239
- This version is strand-aware and will classify mismatches that are due to strand
240
- differences separately from type/coverage differences.
241
-
242
237
  Expected layouts:
243
- - ref entry: [strand, 'ref', type, info]
244
- - add entry: [strand, ..., type (index 3), info (last element)]
238
+ - ref entry: [strand, 'ref', type, info, source]
239
+ - add entry: [strand, ..., type (index 3), info (last element), (optional) source]
245
240
  """
246
241
  only_in_ref = []
247
242
  only_in_additional = []
@@ -255,7 +250,6 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
255
250
  ref_map = ref_map_by_contig.get(contig, {}) or {}
256
251
  add_map = add_map_by_contig.get(contig, {}) or {}
257
252
 
258
- # For each reference feature, find best overlapping additional ORF and classify
259
253
  for rpos, rdata in ref_map.items():
260
254
  rstart, rstop = _parse_pos(rpos)
261
255
  if rstart is None:
@@ -266,10 +260,10 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
266
260
  best_add_data = None
267
261
  matched = False
268
262
 
269
- # reference fields
270
263
  r_strand = rdata[0] if len(rdata) > 0 else ''
271
- r_type = rdata[3] if len(rdata) > 2 else ''
272
- r_info = rdata[-1] if len(rdata) > 3 else ''
264
+ r_type = rdata[2] if len(rdata) > 2 else ''
265
+ r_info = rdata[3] if len(rdata) > 3 else ''
266
+ r_source = rdata[4] if len(rdata) > 4 else ''
273
267
 
274
268
  for apos, adata in add_map.items():
275
269
  astart, astop = _parse_pos(apos)
@@ -284,27 +278,25 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
284
278
  best_add = apos
285
279
  best_add_data = adata
286
280
 
287
- # additional fields
288
281
  a_strand = adata[0] if len(adata) > 0 else ''
289
282
  a_type = adata[3] if len(adata) > 3 else ''
290
- # frame check (distance of stops mod 3)
283
+ a_info = adata[-1] if len(adata) > 0 else ''
284
+ a_source = adata[4] if len(adata) > 4 else ''
285
+
291
286
  try:
292
287
  frame_ok = ((abs(astop - rstop) % 3) == 0)
293
288
  except Exception:
294
289
  frame_ok = True
295
290
 
296
- # check for a fully satisfactory match: type, coverage, strand and frame
297
291
  if a_type == r_type and cov >= cov_thresh and (a_strand == r_strand) and frame_ok:
298
292
  matched = True
299
293
  matched_adds.add((contig, apos))
300
294
  break
301
295
 
302
296
  if matched:
303
- # good match -> not discordant
304
297
  continue
305
298
 
306
299
  if best_add is None:
307
- # no overlapping additional ORF found
308
300
  only_in_ref.append({
309
301
  'contig': contig,
310
302
  'ref_pos': rpos,
@@ -314,26 +306,24 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
314
306
  'status': 'only_in_ref',
315
307
  'coverage': '0.00',
316
308
  'ref_info': r_info,
309
+ 'ref_source': r_source,
317
310
  'add_info': ''
318
311
  })
319
312
  else:
320
- # overlapping additional ORF(s) exist but none satisfied the match criteria
321
313
  a_type = best_add_data[3] if len(best_add_data) > 3 else ''
322
314
  a_info = best_add_data[-1] if len(best_add_data) > 0 else ''
323
315
  a_strand = best_add_data[0] if len(best_add_data) > 0 else ''
316
+ a_source = best_add_data[4] if len(best_add_data) > 4 else ''
324
317
 
325
- # compute reason flags
326
318
  type_match = (a_type == r_type)
327
319
  strand_match = (a_strand == r_strand)
328
320
  cov_ok = (best_cov >= cov_thresh)
329
321
  try:
330
- # use frame between best add and ref
331
322
  astart, astop = _parse_pos(best_add)
332
323
  frame_ok = ((abs(astop - rstop) % 3) == 0) if (astop is not None) else True
333
324
  except Exception:
334
325
  frame_ok = True
335
326
 
336
- # classify mismatch with strand-awareness
337
327
  if not cov_ok:
338
328
  status = 'found_in_additional_but_below_coverage'
339
329
  elif not type_match and not strand_match:
@@ -356,13 +346,14 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
356
346
  'status': status,
357
347
  'coverage': f"{best_cov:.2f}",
358
348
  'ref_info': r_info,
349
+ 'ref_source': r_source,
359
350
  'add_info': a_info,
351
+ 'add_source': a_source,
360
352
  })
361
353
 
362
354
  if best_add:
363
355
  matched_adds.add((contig, best_add))
364
356
 
365
- # Additional-only ORFs: those not matched and not overlapping any reference
366
357
  for apos, adata in add_map.items():
367
358
  if (contig, apos) in matched_adds:
368
359
  continue
@@ -378,6 +369,7 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
378
369
  overlapped = True
379
370
  break
380
371
  if not overlapped:
372
+ add_source = adata[4] if len(adata) > 4 else ''
381
373
  only_in_additional.append({
382
374
  'contig': contig,
383
375
  'ref_pos': '',
@@ -388,9 +380,11 @@ def compute_discordance(ref_map_by_contig, add_map_by_contig, options):
388
380
  'coverage': '0.00',
389
381
  'ref_info': '',
390
382
  'add_info': adata[-1] if len(adata) > 0 else '',
383
+ 'add_source': add_source,
391
384
  })
392
385
 
393
- return only_in_ref, only_in_additional, mismatches
386
+ # Return discordance lists and the set of matched additional ORFs (for overlap counts)
387
+ return only_in_ref, only_in_additional, mismatches, matched_adds
394
388
 
395
389
 
396
390
  def comparator(options):
@@ -491,11 +485,14 @@ def comparator(options):
491
485
  strand = parts[6]
492
486
  pos = f"{start},{stop}"
493
487
  info = parts[8]
488
+ source = parts[1] if len(parts) > 1 else ''
494
489
  except (IndexError, ValueError):
495
490
  continue
496
491
  if seqid not in ref_genes_by_contig:
497
492
  ref_genes_by_contig[seqid] = OrderedDict()
498
- ref_genes_by_contig[seqid].update({pos: [strand, 'ref', ftype, info]})
493
+ # Store source from column 1 as well. Layout becomes:
494
+ # [strand, 'ref', type, info, source]
495
+ ref_genes_by_contig[seqid].update({pos: [strand, 'ref', ftype, info, source]})
499
496
  else:
500
497
  # Use a tool parser to produce ref_genes; expect tool to return mapping contig->dict
501
498
  try:
@@ -558,12 +555,14 @@ def comparator(options):
558
555
  ref_entry = ref_genes.get(f"{o_Start},{o_Stop}")
559
556
  if not ref_entry:
560
557
  continue
561
- # ref_entry layout: [strand, 'ref', type, info]
562
- ref_type = ref_entry[3] if len(ref_entry) > 2 else ''
563
- ref_info = ref_entry[-1] if len(ref_entry) > 3 else ''
558
+ # ref_entry layout now: [strand, 'ref', type, info, source]
559
+ ref_type = ref_entry[2] if len(ref_entry) > 2 else ''
560
+ ref_info = ref_entry[3] if len(ref_entry) > 3 else ''
561
+ ref_source = ref_entry[4] if len(ref_entry) > 4 else ''
564
562
 
565
563
  if additional_type == ref_type and o_Strand == ref_entry[0]:
566
- kept.update({f"{o_Start},{o_Stop}": [o_Strand, options.coverage, additional_type, ref_type, additional_info, ref_info]})
564
+ # kept layout: [strand, coverage, additional_type, ref_type, additional_info, ref_info, ref_source]
565
+ kept.update({f"{o_Start},{o_Stop}": [o_Strand, options.coverage, additional_type, ref_type, additional_info, ref_info, ref_source]})
567
566
  else:
568
567
  cov_thresh = options.coverage
569
568
  for orf, data in orfs.items():
@@ -598,42 +597,43 @@ def comparator(options):
598
597
  cov = 100.0 * overlap / gene_len
599
598
 
600
599
  g_Strand = r_data[0]
601
- # r_data layout: [strand, 'ref', type, info]
602
- ref_type = r_data[3] if len(r_data) > 2 else ''
603
- ref_info = r_data[-1] if len(r_data) > 3 else ''
600
+ # r_data layout now: [strand, 'ref', type, info, source]
601
+ ref_type = r_data[2] if len(r_data) > 2 else ''
602
+ ref_info = r_data[3] if len(r_data) > 3 else ''
603
+ ref_source = r_data[4] if len(r_data) > 4 else ''
604
604
 
605
605
  if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and cov >= cov_thresh:
606
606
  if additional_type == ref_type:
607
- kept[f"{g_Start},{g_Stop}"] = [g_Strand, int(cov), additional_type, ref_type,
608
- additional_info, ref_info]
607
+ # keep ref_source with the kept entry
608
+ kept[f"{g_Start},{g_Stop}"] = [g_Strand, int(cov), additional_type, ref_type, additional_info, ref_info, ref_source]
609
609
  genes_To_Keep_by_contig[contig] = sortORFs(kept)
610
610
 
611
611
  # Log counts for debugging why GFF might be empty
612
- try:
613
- total_ref = sum(len(v) for v in ref_genes_by_contig.values()) if ref_genes_by_contig else 0
614
- except Exception:
615
- total_ref = 0
616
- try:
617
- total_add = sum(len(v) for v in additional_by_contig.values()) if additional_by_contig else 0
618
- except Exception:
619
- total_add = 0
620
- try:
621
- total_kept = sum(len(v) for v in genes_To_Keep_by_contig.values()) if genes_To_Keep_by_contig else 0
622
- except Exception:
623
- total_kept = 0
624
- logging.info('Reference genes loaded: %d', total_ref)
625
- logging.info('Additional ORFs loaded: %d', total_add)
626
- logging.info('Kept genes after intersection: %d', total_kept)
612
+ # Compute summary metrics (safe/simple)
613
+ total_ref = sum(len(v) for v in ref_genes_by_contig.values()) if ref_genes_by_contig else 0
614
+ total_add = sum(len(v) for v in additional_by_contig.values()) if additional_by_contig else 0
615
+ total_kept = sum(len(v) for v in genes_To_Keep_by_contig.values()) if genes_To_Keep_by_contig else 0
616
+ # Print totals in requested order: reference, additional, then overlap/kept and a percentage
617
+ logging.info('Totals -- reference_genes=%d, additional_genes=%d, overlapping/kept=%d', total_ref, total_add, total_kept)
618
+ if total_ref:
619
+ logging.info('Overlap relative to reference: %.2f%%', (100.0 * total_kept / total_ref))
627
620
 
628
621
  # If requested, compute discordance lists and write three GFF outputs
629
622
  if getattr(options, 'report_discordance', False):
630
- # Compute discordance lists
631
- only_in_ref, only_in_additional, mismatches = compute_discordance(ref_genes_by_contig, additional_by_contig, options)
623
+ # Compute discordance lists and matched additional ORFs
624
+ only_in_ref, only_in_additional, mismatches, matched_adds = compute_discordance(ref_genes_by_contig, additional_by_contig, options)
632
625
  base = os.path.splitext(os.path.basename(options.output_file))[0] if getattr(options, 'output_file', None) else 'discordance'
633
626
  outdir = os.path.dirname(options.output_file) if getattr(options, 'output_file', None) else '.'
634
627
  ref_base = os.path.splitext(os.path.basename(options.reference_annotation))[0]
635
628
 
636
- # Keep the three detailed GFF outputs (backward compatible)
629
+ # Compute and log clear summary metrics
630
+ total_ref = sum(len(v) for v in ref_genes_by_contig.values()) if ref_genes_by_contig else 0
631
+ total_add = sum(len(v) for v in additional_by_contig.values()) if additional_by_contig else 0
632
+ overlapping_additional = len(matched_adds) if matched_adds is not None else 0
633
+ overlapping_reference = max(0, total_ref - (len(only_in_ref) if only_in_ref is not None else 0))
634
+ logging.info('Summary: reference_genes=%d, additional_geness=%d, additional_genes_overlapping_any_reference=%d, reference_genes_overlapped=%d', total_ref, total_add, overlapping_additional, overlapping_reference)
635
+
636
+ # Keep the three detailed GFF outputs (backward compatible)
637
637
  gff_ref = os.path.join(outdir, f"{base}.only_in_reference.gff")
638
638
  gff_add = os.path.join(outdir, f"{base}.only_in_additional.gff")
639
639
  gff_mis = os.path.join(outdir, f"{base}.mismatches.gff")
@@ -679,6 +679,7 @@ def comparator(options):
679
679
  contig_summary = {}
680
680
  logging.info('Kept genes by contig (sample): %s', dict(list(contig_summary.items())[:10]))
681
681
  logging.info('Writing combined GFF to %s', options.output_file)
682
+ # single correct invocation of gff_writer
682
683
  gff_writer(genome_ID, genome_DNA_path, options.reference_annotation, getattr(options, 'reference_tool', None), None, options.additional_annotation, options.additional_tool, genes_To_Keep_by_contig, options.output_file, getattr(options, 'gene_ident', None))
683
684
  logging.info('gff_writer finished (check output file)')
684
685
  except Exception as e:
@@ -688,8 +689,6 @@ def comparator(options):
688
689
 
689
690
 
690
691
  def main():
691
- print(WELCOME)
692
-
693
692
  parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Annotation-Intersector Run Parameters')
694
693
 
695
694
  required = parser.add_argument_group('Required Arguments')
@@ -721,6 +720,20 @@ def main():
721
720
 
722
721
 
723
722
  if __name__ == '__main__':
724
- main()
725
- print('Complete')
723
+ try:
724
+ try:
725
+ main()
726
+ except Exception:
727
+ logging.exception('Unhandled exception in main')
728
+ finally:
729
+ print(CLOSING)
730
+
731
+
732
+
733
+
734
+
735
+
736
+
737
+
738
+
726
739
 
@@ -4,13 +4,13 @@ from datetime import datetime
4
4
  import os
5
5
  import sys
6
6
 
7
+
7
8
  try:
8
9
  from .utils import *
9
- from .Aux.TabToGFF.TabToGFF import TabToGFF
10
+ from .Tools.TabToGFF.TabToGFF import TabToGFF
10
11
  except (ImportError, ModuleNotFoundError):
11
12
  from utils import *
12
- from ORForise.src.ORForise.Aux.TabToGFF import TabToGFF
13
-
13
+ from Tools.TabToGFF.TabToGFF import TabToGFF
14
14
 
15
15
 
16
16
 
@@ -50,10 +50,37 @@ def write_gff(outpath, genome_ID, genome_DNA, input_annotation, fmt, features):
50
50
  pos_ = pos.split(',')
51
51
  start = pos_[0]
52
52
  stop = pos_[-1]
53
- strand = data['strand']
53
+ strand = data.get('strand', '.')
54
54
  if fmt == 'abricate': # Currently only supports abricate format
55
- info = 'abricate_anotation;accession='+data['accession']+';database='+data['database']+';identity='+str(data['identity'])+';coverage='+str(data['coverage'])+';product='+data['product']+';resistance='+data['resistance']
56
- entry = f"{data['seqid']}\t{fmt}\t{'CDS'}\t{start}\t{stop}\t.\t{strand}\t.\t{'ID='}{info}\n"
55
+ info = 'abricate_annotation;accession={};database={};identity={};coverage={};product={};resistance={}'.format(
56
+ data.get('accession', 'unknown'),
57
+ data.get('database', 'unknown'),
58
+ data.get('identity', ''),
59
+ data.get('coverage', ''),
60
+ data.get('product', ''),
61
+ data.get('resistance', '')
62
+ )
63
+ elif fmt in ('amrfinder', 'amrfinderplus', 'amr'):
64
+ # Build a compact attribute string for amrfinder-plus output
65
+ info = ('amrfinder_annotation;element={};element_name={};protein_id={};type={};class={};subclass={};method={};pct_cov={};pct_id={};closest_acc={};closest_name={}').format(
66
+ data.get('element_symbol', ''),
67
+ data.get('element_name', ''),
68
+ data.get('protein_id', ''),
69
+ data.get('type', ''),
70
+ data.get('class', ''),
71
+ data.get('subclass', ''),
72
+ data.get('method', ''),
73
+ data.get('pct_coverage', ''),
74
+ data.get('pct_identity', ''),
75
+ data.get('closest_accession', ''),
76
+ data.get('closest_name', '')
77
+ )
78
+ else:
79
+ # Generic fallback: try to include any seqid/gene info if present
80
+ gene_id = data.get('gene') or data.get('ID') or ''
81
+ info = f"annotation;id={gene_id}"
82
+
83
+ entry = f"{data.get('seqid', genome_ID)}\t{fmt}\tCDS\t{start}\t{stop}\t.\t{strand}\t.\tID={info}\n"
57
84
  out.write(entry)
58
85
 
59
86
 
@@ -73,15 +100,13 @@ def load_genome(genome_fasta):
73
100
 
74
101
 
75
102
  def main():
76
- print(WELCOME)
77
-
78
103
  parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': Convert-To-GFF Run Parameters')
79
104
  parser._action_groups.pop()
80
105
 
81
106
  required = parser.add_argument_group('Required Arguments')
82
107
 
83
108
  required.add_argument('-i', dest='input_annotation', required=True, help='Input annotation file (tabular)')
84
- required.add_argument('-fmt', dest='format', required=True, help='Input format: blast, abricate, genemark')
109
+ required.add_argument('-fmt', dest='format', required=True, help='Input format: amrfinder, abricate, blast')
85
110
  required.add_argument('-o', dest='output_dir', required=True, help='Output directory')
86
111
 
87
112
  optional = parser.add_argument_group('Optional Arguments')
@@ -138,4 +163,10 @@ def main():
138
163
  logging.info('Logfile: %s', logfile)
139
164
 
140
165
  if __name__ == '__main__':
141
- main()
166
+ try:
167
+ try:
168
+ main()
169
+ except Exception:
170
+ logging.exception('Unhandled exception in main')
171
+ finally:
172
+ print(CLOSING)
@@ -496,8 +496,6 @@ def gff_adder(options):
496
496
 
497
497
 
498
498
  def main():
499
- print(WELCOME)
500
-
501
499
  parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': GFF-Adder Run Parameters.')
502
500
  parser._action_groups.pop()
503
501
 
@@ -539,5 +537,10 @@ def main():
539
537
 
540
538
 
541
539
  if __name__ == "__main__":
542
- main()
543
- print("Complete")
540
+ try:
541
+ try:
542
+ main()
543
+ except Exception:
544
+ logging.exception('Unhandled exception in main')
545
+ finally:
546
+ print(CLOSING)
@@ -11,8 +11,6 @@ except (ImportError, ModuleNotFoundError):
11
11
 
12
12
 
13
13
  def main():
14
- print(WELCOME)
15
-
16
14
  print('ORForise ' + ORForise_Version + ': List Tools Run Parameters')
17
15
 
18
16
  tools = set()
@@ -52,5 +50,10 @@ def main():
52
50
 
53
51
 
54
52
  if __name__ == "__main__":
55
- main()
56
- print("Complete")
53
+ try:
54
+ try:
55
+ main()
56
+ except Exception:
57
+ print('Unhandled exception in main')
58
+ finally:
59
+ print(CLOSING)
@@ -101,7 +101,6 @@ def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare
101
101
 
102
102
 
103
103
  def main():
104
- print(WELCOME)
105
104
  parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': StORForise Run Parameters.')
106
105
  parser.add_argument('-t', '--tool', default='GFF', help='Which tool/format would you analyse with StORF-R?')
107
106
  parser.add_argument('-i', '--input_to_analyse', default='', help='Location of file containing missed genes')
@@ -112,4 +111,10 @@ def main():
112
111
  comparator(**vars(args))
113
112
 
114
113
  if __name__ == "__main__":
115
- main()
114
+ try:
115
+ try:
116
+ main()
117
+ except Exception:
118
+ print('Unhandled exception in main')
119
+ finally:
120
+ print(CLOSING)
@@ -31,7 +31,7 @@ def GFF(*args):
31
31
  genome_rev = revCompIterative(genome)
32
32
  with open(tool_pred, 'r') as gff_input:
33
33
  for line in gff_input:
34
- if '#' not in line:
34
+ if not line.startswith('#'):
35
35
  line = line.split('\t')
36
36
  #gene_types = types.split(',') - Temporary fix
37
37
  #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9: # line[2] for normalrun
@@ -68,7 +68,7 @@ def GFF(*args):
68
68
  stopCodon = genome[stop - 3:stop]
69
69
  po = str(start) + ',' + str(stop)
70
70
  orf = [strand, startCodon, stopCodon, line[2], 'GFF-Standard'] # This needs to detect the type
71
- GFF_ORFs.update({po: orf})
71
+ GFF_ORFs[dna_region].update({po: orf})
72
72
  # elif "CDS" in line[2]:
73
73
  # sys.exit("SAS")
74
74
 
@@ -128,6 +128,76 @@ def parse_genemark(path, genome_seq, gene_ident=None):
128
128
  return results
129
129
 
130
130
 
131
+ def parse_amrfinderplus(path, genome_seq, gene_ident=None):
132
+ """
133
+ Parse amrfinder-plus TSV (header line present). Produces an OrderedDict
134
+ keyed by "start,stop" -> attrs dict similar to parse_abricate.
135
+ """
136
+ results = collections.OrderedDict()
137
+ count = 0
138
+ with open(path, 'r') as fh:
139
+ header = None
140
+ header_map = {}
141
+ for i, line in enumerate(fh, 1):
142
+ line = line.rstrip('\n')
143
+ if not line:
144
+ continue
145
+ # Skip comment lines but treat the first non-empty non-comment line as header
146
+ if line.startswith('#'):
147
+ continue
148
+ if header is None:
149
+ header = line.split('\t')
150
+ header_map = {h.strip(): idx for idx, h in enumerate(header)}
151
+ continue
152
+ parts = line.split('\t')
153
+ # allow lines with fewer/more columns but avoid crashes
154
+ if header and len(parts) < len(header):
155
+ logging.warning(f"Line {i}: unexpected number of columns in amrfinder line")
156
+ continue
157
+ try:
158
+ start = int(parts[header_map.get('Start')])
159
+ end = int(parts[header_map.get('Stop')])
160
+ except Exception:
161
+ logging.warning(f"Line {i}: invalid Start/Stop in amrfinder line")
162
+ continue
163
+ strand = parts[header_map.get('Strand', '')]
164
+ seqid = parts[header_map.get('Contig id', '')]
165
+ protein_id = parts[header_map.get('Protein id', '')]
166
+ element_symbol = parts[header_map.get('Element symbol', '')]
167
+ element_name = parts[header_map.get('Element name', '')]
168
+ amr_type = parts[header_map.get('Type', '')]
169
+ amr_subtype = parts[header_map.get('Subtype', '')]
170
+ amr_class = parts[header_map.get('Class', '')]
171
+ amr_subclass = parts[header_map.get('Subclass', '')]
172
+ method = parts[header_map.get('Method', '')]
173
+ pct_cov = parts[header_map.get('% Coverage of reference', '')]
174
+ pct_id = parts[header_map.get('% Identity to reference', '')]
175
+ closest_acc = parts[header_map.get('Closest reference accession', '')]
176
+ closest_name = parts[header_map.get('Closest reference name', '')]
177
+
178
+ attrs = {
179
+ 'seqid': seqid,
180
+ 'start': start,
181
+ 'end': end,
182
+ 'strand': strand,
183
+ 'protein_id': protein_id,
184
+ 'element_symbol': element_symbol,
185
+ 'element_name': element_name,
186
+ 'type': amr_type,
187
+ 'subtype': amr_subtype,
188
+ 'class': amr_class,
189
+ 'subclass': amr_subclass,
190
+ 'method': method,
191
+ 'pct_coverage': pct_cov,
192
+ 'pct_identity': pct_id,
193
+ 'closest_accession': closest_acc,
194
+ 'closest_name': closest_name
195
+ }
196
+ results[f"{start},{end}"] = attrs
197
+ count += 1
198
+ return results
199
+
200
+
131
201
  def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
132
202
  # Should be cleaned up to use consistent format names
133
203
  fmt = fmt.lower()
@@ -137,4 +207,6 @@ def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
137
207
  return parse_abricate(input_file, genome_seq, gene_ident)
138
208
  if fmt in ('genemark', 'gene_mark'):
139
209
  return parse_genemark(input_file, genome_seq, gene_ident)
210
+ if fmt in ('amrfinder', 'amrfinderplus', 'amr'):
211
+ return parse_amrfinderplus(input_file, genome_seq, gene_ident)
140
212
  raise ValueError(f"Unknown format: {fmt}")
@@ -4,8 +4,8 @@ import collections
4
4
  # Constants
5
5
  SHORT_ORF_LENGTH = 300
6
6
  MIN_COVERAGE = 75
7
- ORForise_Version = 'v1.6.1'
8
- WELCOME=("Thank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
7
+ ORForise_Version = 'v1.6.5'
8
+ CLOSING=("\n####\nThank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
9
9
  "Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
10
10
  "#####")
11
11
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ORForise
3
- Version: 1.6.3
3
+ Version: 1.6.5
4
4
  Summary: ORForise - A platform for analysing and comparing genome annotations.
5
5
  Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
6
6
  License: GNU GENERAL PUBLIC LICENSE
@@ -662,7 +662,7 @@ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```
662
662
  For Help: ```Annotation-Compare -h ```
663
663
 
664
664
  ```python
665
- ORForise v1.6.3: Annotatione-Compare Run Parameters.
665
+ ORForise v1.6.5: Annotatione-Compare Run Parameters.
666
666
 
667
667
  Required Arguments:
668
668
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -730,7 +730,7 @@ ORForise can be used as the example below.
730
730
  For Help: ```Aggregate-Compare -h ```
731
731
 
732
732
  ```python
733
- ORForise v1.6.3: Aggregate-Compare Run Parameters.
733
+ ORForise v1.6.5: Aggregate-Compare Run Parameters.
734
734
 
735
735
  Required Arguments:
736
736
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -811,7 +811,7 @@ GFF-Adder combines two existing annotations (GFF or other tool formats).
811
811
  For Help: ```GFF-Adder -h ```
812
812
 
813
813
  ```python
814
- ORForise v1.6.3: GFF-Adder Run Parameters.
814
+ ORForise v1.6.5: GFF-Adder Run Parameters.
815
815
 
816
816
  Required Arguments:
817
817
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -868,7 +868,7 @@ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
868
868
  [-cov COVERAGE] [--report-discordance]
869
869
  [--report-discordance-file REPORT_DISCORDANCE_FILE]
870
870
 
871
- ORForise v1.6.3: Annotation-Intersector Run Parameters
871
+ ORForise v1.6.5: Annotation-Intersector Run Parameters
872
872
 
873
873
  options:
874
874
  -h, --help show this help message and exit
@@ -939,7 +939,7 @@ Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_c
939
939
 
940
940
  ```
941
941
 
942
- #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
942
+ #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses (BETA!!!).
943
943
  For Help: ```Convert_To_GFF.py -h ```
944
944
  ```
945
945
  Thank you for using ORForise
@@ -947,7 +947,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
947
947
  #####
948
948
  usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
949
949
 
950
- ORForise v1.6.3: Convert-To-GFF Run Parameters
950
+ ORForise v1.6.5: Convert-To-GFF Run Parameters
951
951
 
952
952
  Required Arguments:
953
953
  -dna GENOME_DNA Genome DNA file (.fa)
@@ -23,8 +23,6 @@ src/ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py
23
23
  src/ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py
24
24
  src/ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py
25
25
  src/ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py
26
- src/ORForise/Aux/TabToGFF/TabToGFF.py
27
- src/ORForise/Aux/TabToGFF/__init__.py
28
26
  src/ORForise/ORForise_Analysis/__init__.py
29
27
  src/ORForise/ORForise_Analysis/cds_checker.py
30
28
  src/ORForise/ORForise_Analysis/gene_Lenghts.py
@@ -71,5 +69,7 @@ src/ORForise/Tools/Prokka/Prokka.py
71
69
  src/ORForise/Tools/Prokka/__init__.py
72
70
  src/ORForise/Tools/StORF-Reporter/StORF-Reporter.py
73
71
  src/ORForise/Tools/StORF-Reporter/__init__.py
72
+ src/ORForise/Tools/TabToGFF/TabToGFF.py
73
+ src/ORForise/Tools/TabToGFF/__init__.py
74
74
  src/ORForise/Tools/TransDecoder/TransDecoder.py
75
75
  src/ORForise/Tools/TransDecoder/__init__.py
File without changes
File without changes