ORForise 1.6.0__tar.gz → 1.6.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. {orforise-1.6.0 → orforise-1.6.1}/PKG-INFO +216 -229
  2. orforise-1.6.1/README.md +397 -0
  3. {orforise-1.6.0 → orforise-1.6.1}/pyproject.toml +6 -4
  4. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Aggregate_Compare.py +2 -4
  5. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Annotation_Compare.py +4 -7
  6. orforise-1.6.1/src/ORForise/Annotation_Intersector.py +726 -0
  7. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Convert_To_GFF.py +6 -5
  8. orforise-1.6.1/src/ORForise/GFF_Adder.py +543 -0
  9. orforise-1.6.1/src/ORForise/List_Tools.py +63 -0
  10. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/StORForise.py +8 -4
  11. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/EasyGene/EasyGene.py +13 -1
  12. orforise-1.6.0/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py → orforise-1.6.1/src/ORForise/Tools/GLIMMER3/GLIMMER3.py +2 -2
  13. orforise-1.6.0/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py → orforise-1.6.1/src/ORForise/Tools/GeneMarkHA/GeneMarkHA.py +1 -1
  14. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Prodigal/Prodigal.py +13 -1
  15. orforise-1.6.1/src/ORForise/__init__.py +0 -0
  16. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/utils.py +4 -1
  17. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/PKG-INFO +216 -229
  18. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/SOURCES.txt +15 -13
  19. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/entry_points.txt +4 -2
  20. orforise-1.6.0/README.md +0 -410
  21. orforise-1.6.0/src/ORForise/GFF_Adder.py +0 -268
  22. orforise-1.6.0/src/ORForise/GFF_Intersector.py +0 -192
  23. {orforise-1.6.0 → orforise-1.6.1}/LICENSE +0 -0
  24. {orforise-1.6.0 → orforise-1.6.1}/setup.cfg +0 -0
  25. {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
  26. {orforise-1.6.0/src/ORForise/ORForise_Analysis → orforise-1.6.1/src/ORForise/Aux/StORF_Undetected/Completely_Undetected}/__init__.py +0 -0
  27. {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/StORF_Undetected/StORF_Undetected.py +0 -0
  28. {orforise-1.6.0/src/ORForise/Tools/Augustus → orforise-1.6.1/src/ORForise/Aux/StORF_Undetected}/__init__.py +0 -0
  29. {orforise-1.6.0/src/ORForise/Tools/Balrog → orforise-1.6.1/src/ORForise/Aux/StORF_Undetected/unvitiated_Genes}/__init__.py +0 -0
  30. {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
  31. {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/TabToGFF/TabToGFF.py +0 -0
  32. {orforise-1.6.0/src/ORForise/Tools/EasyGene → orforise-1.6.1/src/ORForise/Aux/TabToGFF}/__init__.py +0 -0
  33. {orforise-1.6.0/src/ORForise/Tools/FGENESB → orforise-1.6.1/src/ORForise/Aux}/__init__.py +0 -0
  34. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Comparator.py +0 -0
  35. {orforise-1.6.0/src/ORForise/Tools/FragGeneScan → orforise-1.6.1/src/ORForise/ORForise_Analysis}/__init__.py +0 -0
  36. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
  37. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
  38. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
  39. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
  40. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
  41. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
  42. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
  43. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
  44. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
  45. {orforise-1.6.0/src/ORForise/Tools/GFF → orforise-1.6.1/src/ORForise/Tools/Augustus}/__init__.py +0 -0
  46. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
  47. {orforise-1.6.0/src/ORForise/Tools/GLIMMER_3 → orforise-1.6.1/src/ORForise/Tools/Balrog}/__init__.py +0 -0
  48. {orforise-1.6.0/src/ORForise/Tools/GeneMark → orforise-1.6.1/src/ORForise/Tools/EasyGene}/__init__.py +0 -0
  49. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
  50. {orforise-1.6.0/src/ORForise/Tools/GeneMark_HA → orforise-1.6.1/src/ORForise/Tools/FGENESB}/__init__.py +0 -0
  51. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
  52. {orforise-1.6.0/src/ORForise/Tools/GeneMark_HMM → orforise-1.6.1/src/ORForise/Tools/FragGeneScan}/__init__.py +0 -0
  53. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GFF/GFF.py +0 -0
  54. {orforise-1.6.0/src/ORForise/Tools/GeneMark_S → orforise-1.6.1/src/ORForise/Tools/GFF}/__init__.py +0 -0
  55. {orforise-1.6.0/src/ORForise/Tools/GeneMark_S_2 → orforise-1.6.1/src/ORForise/Tools/GLIMMER3}/__init__.py +0 -0
  56. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
  57. {orforise-1.6.0/src/ORForise/Tools/MetaGene → orforise-1.6.1/src/ORForise/Tools/GeneMark}/__init__.py +0 -0
  58. {orforise-1.6.0/src/ORForise/Tools/MetaGeneAnnotator → orforise-1.6.1/src/ORForise/Tools/GeneMarkHA}/__init__.py +0 -0
  59. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -0
  60. {orforise-1.6.0/src/ORForise/Tools/MetaGeneMark → orforise-1.6.1/src/ORForise/Tools/GeneMark_HMM}/__init__.py +0 -0
  61. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -0
  62. {orforise-1.6.0/src/ORForise/Tools/Prodigal → orforise-1.6.1/src/ORForise/Tools/GeneMark_S}/__init__.py +0 -0
  63. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -0
  64. {orforise-1.6.0/src/ORForise/Tools/Prokka → orforise-1.6.1/src/ORForise/Tools/GeneMark_S_2}/__init__.py +0 -0
  65. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
  66. {orforise-1.6.0/src/ORForise/Tools/StORF_Reporter → orforise-1.6.1/src/ORForise/Tools/MetaGene}/__init__.py +0 -0
  67. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
  68. {orforise-1.6.0/src/ORForise/Tools/StORF_Undetected/Completely_Undetected → orforise-1.6.1/src/ORForise/Tools/MetaGeneAnnotator}/__init__.py +0 -0
  69. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
  70. {orforise-1.6.0/src/ORForise/Tools/StORF_Undetected → orforise-1.6.1/src/ORForise/Tools/MetaGeneMark}/__init__.py +0 -0
  71. {orforise-1.6.0/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes → orforise-1.6.1/src/ORForise/Tools/Prodigal}/__init__.py +0 -0
  72. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
  73. {orforise-1.6.0/src/ORForise/Tools/TabToGFF → orforise-1.6.1/src/ORForise/Tools/Prokka}/__init__.py +0 -0
  74. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +0 -0
  75. {orforise-1.6.0/src/ORForise/Tools/TransDecoder → orforise-1.6.1/src/ORForise/Tools/StORF_Reporter}/__init__.py +0 -0
  76. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
  77. {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Tools/TransDecoder}/__init__.py +0 -0
  78. {orforise-1.6.0/src/ORForise → orforise-1.6.1/src/ORForise/Tools}/__init__.py +0 -0
  79. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/dependency_links.txt +0 -0
  80. {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/top_level.txt +0 -0
@@ -1,7 +1,7 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: ORForise
3
- Version: 1.6.0
4
- Summary: ORForise - Platform for analysing and comparing gene predictions.
3
+ Version: 1.6.1
4
+ Summary: ORForise - A platform for analysing and comparing genome annotations.
5
5
  Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
6
6
  License: GNU GENERAL PUBLIC LICENSE
7
7
  Version 3, 29 June 2007
@@ -639,61 +639,42 @@ Description-Content-Type: text/markdown
639
639
  License-File: LICENSE
640
640
  Dynamic: license-file
641
641
 
642
- # ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
642
+ # ORForise - Genome Annotation Analysis and Comparison Platform
643
643
  ## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
644
- ### Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
645
- ### Novel genome annotations can be compared to a provided reference annotation from Ensembl and predictions from other tools (or any given GFF annotation) .
646
644
 
647
645
  # Requirements and Installation:
648
646
 
649
- ### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
647
+ ### The ORForise platform is written in Python (3.6-3.*) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
650
648
 
651
649
  ## Intallation:
652
650
 
653
- ### The ORForise platform is available via the pip Python package manager ```pip3 install ORForise```.
654
- ### Consider using '--no-cache-dir' with pip to ensure the download of the newest version of the package.
655
-
656
- ## Required Files:
657
-
658
- To run, you need:
659
- * Input Genome FASTA and corresponding GFF file (or CDS predictions with the annotated genes for the genome you want to use as reference in one of the tool output formats listed below).
660
- * A prediction output from one of the compatible tools for the same genome.
661
-
662
- ### How to add your own Genome:
663
-
664
- Corresponding FASTA and GFF files must be provided for the genome the analysis is to be performed on, including the corresponding output of any tools to compare.
665
-
666
- ### How to add your own tool:
667
-
668
- If the new tool reports its predictions in GFF you can present ORForise with "GFF" for either the reference ```-rt``` or prediction ```-t``` option.
669
- If the tool uses another non-standard format, a request can be made to add it as an option via GitHub.
670
-
651
+ ### ORForise is available via the pip Python package manager ```pip3 install ORForise``` and bioconda ```conda install -c bioconda ORForise```.
671
652
 
672
653
  ### Testing:
673
654
  Precomputed testing and data which includes example input and output files for all tools presented below is available in the `~ORForise/Testing` directory of the GitHub repository.
674
- Example output files from ```Annotation-Compare```, ```GFF-Adder``` and ```GFF-Intersector``` are made available to validate installation.
655
+ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```Convert-To-GFF``` and ```Annotation-Intersector``` are available.
675
656
 
676
657
 
677
- ## CDS Prediction Analysis:
658
+ ## Genome Annotation Analysis:
678
659
 
679
660
  ### Use-cases: (Running if via pip)
680
661
 
681
662
  For Help: ```Annotation-Compare -h ```
682
663
 
683
664
  ```python
684
- ORForise v1.6.0: Annotatione-Compare Run Parameters.
665
+ ORForise v1.6.1: Annotatione-Compare Run Parameters.
685
666
 
686
667
  Required Arguments:
687
668
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
688
669
  -ref REFERENCE_ANNOTATION
689
670
  Which reference annotation file to use as reference?
690
- -t TOOL Which tool to analyse? (Prodigal)
691
- -tp TOOL_PREDICTION Tool genome prediction file (.gff) - Different Tool Parameters are compared individually via
692
- separate files
671
+ -t TOOL Which tool to analyse?
672
+ -tp TOOL_PREDICTION Tool genome prediction file (.gff) - Different Tool Parameters are compared individually via separate files
693
673
 
694
674
  Optional Arguments:
695
- -rt REFERENCE_TOOL What type of Annotation to compare to? -- Leave blank for Ensembl reference- Provide tool
696
- name to compare output from two tools
675
+ -gene_ident GENE_IDENT
676
+ What features to consider as genes? - Default: CDS - Provide comma separated list of features to consider as genes (e.g. CDS,exon)
677
+ -rt REFERENCE_TOOL What type of Annotation to compare to? -- Leave blank for Ensembl reference- Provide tool name to compare output from two tools
697
678
 
698
679
  Output:
699
680
  -o OUTDIR Define directory where detailed output should be places
@@ -702,18 +683,42 @@ Output:
702
683
  Misc:
703
684
  -v {True,False} Default - False: Print out runtime status
704
685
  ```
686
+ ### Compare a *de novo* genome annotation to an Ensembl annotation:
705
687
 
706
- ## Compare a novel genome annotation to an Ensembl annotation:
707
-
708
- Genome annotation is a difficult process, even for Prokaryotes. ORForise allows the direct and systematic analysis of
709
- a novel CDS prediction from a wide selection of tools to a reference Genome Annotation, such as those provided by
688
+ Genome annotation is a difficult process, even for Prokaryotes. ORForise allows for the direct and systematic analysis of
689
+ *de novo* gene prediction from a wide selection of tools to a reference Genome Annotation, such as those provided by
710
690
  Ensembl Bacteria.
711
691
 
712
- #### Example: Installation through pip will allow user to call the programs directly from the ORForise package.
692
+ #### Example: Installation through pip will allow user to call the programs directly from the ORForise package (Prodigal and Pyrodigal provide annotations in the same format).
713
693
  ```python
714
- Annotation-Compare -dna ~/Testing/Myco.fa -ref ~/Testing/Myco.gff -t Prodigal -tp ~/Testing/Prodigal_Myco.gff
694
+ Annotation-Compare -dna ~/Test_Data/Genomes/E-coli/Escherichia_coli.fasta -ref ~/Test_Data/Genomes/E-coli/Escherichia_coli.gff -t Prodigal -tp ~/Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff
695
+ ```
696
+ ### Example Output: - See ```~/Test_Data/Genomes/E-coli/annotation_compare```
697
+ ```commandline
698
+ Genome Used: Escherichia_coli.fasta
699
+ Reference Used: Escherichia_coli.gff
700
+ Tool Compared: Prodigal
701
+ Total Number of Reference Genes: 5222
702
+ Number of Contigs: 4
703
+ Contig Genes ORFs Perfect_Matches Partial_Matches Missed_Genes Unmatched_ORFs Multi_Matched_ORFs
704
+ ERS715463SCcontig000003 4068 4070 4065 1 2 4 0
705
+ ERS715463SCcontig000002 1033 1035 1033 0 0 2 0
706
+ ERS715463SCcontig000001 75 77 75 0 0 2 0
707
+ ERS715463SCcontig000004 46 47 45 1 0 1 0
708
+
709
+ Overall Summary:
710
+ Number of Genes: 5222
711
+ Number of ORFs: 5229
712
+ Perfect Matches: 5218 [5222] - 99.92%
713
+ Partial Matches: 2 [5222] - 0.04%
714
+ Missed Genes: 2 [5222] - 0.04%
715
+ Unmatched ORFs: 9 [5222] - 0.17%
716
+ Multi-matched ORFs: 0 [5222] - 0.00%
717
+
715
718
  ```
716
- ### Compare different novel annotations with each other on a single Genome:
719
+
720
+
721
+ ## Compare different novel annotations with each other on a single Genome:
717
722
 
718
723
  If a reference Genome Annotation is not available or a direct comparison between two or more tools is wanted,
719
724
  ORForise can be used as the example below.
@@ -725,253 +730,235 @@ ORForise can be used as the example below.
725
730
  For Help: ```Aggregate-Compare -h ```
726
731
 
727
732
  ```python
728
- ORForise v1.6.0: Aggregate-Compare Run Parameters.
733
+ ORForise v1.6.1: Aggregate-Compare Run Parameters.
729
734
 
730
735
  Required Arguments:
731
736
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
732
- -t TOOLS Which tools to analyse? (Prodigal,GeneMarkS)
737
+ -t TOOLS Which tools to analyse?
733
738
  -tp TOOL_PREDICTIONS Tool genome prediction file (.gff) - Providefile locations for each tool comma separated
734
739
  -ref REFERENCE_ANNOTATION
735
740
  Which reference annotation file to use as reference?
736
741
 
737
742
  Optional Arguments:
738
- -rt REFERENCE_TOOL What type of Annotation to compare to? -- Leave blank for Ensembl reference- Provide tool
739
- name to compare output from two tools
743
+ -gene_ident GENE_IDENT
744
+ What features to consider as genes? - Default: CDS - Provide comma separated list of features to consider as genes (e.g. CDS,exon)
745
+ -rt REFERENCE_TOOL What type of Annotation to compare to? -- Leave blank for Ensembl reference- Provide tool name to compare output from two tools
740
746
 
741
747
  Output:
742
- -o OUTNAME Define full output filename (format is CSV) - If not provided, summary will be printed to
743
- std-out
748
+ -o OUTDIR Define directory where detailed output should be places - If not provided, summary will be printed to std-out
749
+ -n OUTNAME Define output file name - Mandatory is -o is provided: <outname>_<contig_id>_ORF_Comparison.csv
744
750
 
745
751
  Misc:
746
752
  -v {True,False} Default - False: Print out runtime status
753
+
747
754
  ```
748
755
 
749
756
  #### Example:
750
757
  ```python
751
- Aggregate-Compare -ref ~/Testing/Myco.gff -dna ~/Testing/Myco.fa -t Prodigal,TransDecoder,GeneMark_S_2 -tp ~/Testing/Prodigal_Myco.gff,~/Testing/TransDecoder_Myco.gff,~/Testing/GeneMark_S_2_Myco.gff
758
+ Aggregate-Compare -ref ~/Test_Data/Genomes/E-coli/Escherichia_coli.gff -dna ~/Test_Data/Genomes/E-coli/Escherichia_coli.fasta -t Prodigal,GeneMarkS2 -tp ~/Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff,~/Test_Data/Genomes/E-coli/GeneMarkS2_E-coli.gff
752
759
  ```
753
- This will compare the Aggregate the predictions of Prodigal, TransDecoder and GLIMMER 3 against the Mycoplasma reference annotation provided by
754
- Ensembl Bacteria.
760
+ This will compare and agregate the predictions of Prodigal and GeneMarkS2 against the E-coli reference annotation provided by Ensembl Bacteria.
755
761
 
756
- ## Annotation Comparison Output - The output format is the same for Annotation_Compare and Aggregate_Compare:
757
- ### Print to screen example - Prodigal prediction compared to Ensembl Bacteria reference annotation of *Escherichia coli*:
762
+ ### Annotation Comparison Output - The output format is the same for Annotation_Compare and Aggregate_Compare: See ```~/Test_Data/Genomes/E-coli/aggregate_compare```
758
763
  ```bash
759
- Annotation-Compare.py -ref ./Testing/Myco.gff -dna ./Testing/Myco.fa -t Prodigal -tp ./Testing/Prodigal_Myco.gff
760
- Genome Used: Myco
761
- Reference Used: Testing/Myco.gff
762
- Tool Compared: Prodigal
763
- Perfect Matches:128[476] -26.89%
764
- Partial Matches:62[476] - 13.03%
765
- Missed Genes:286[476] - 60.08%
766
- Complete
767
- ```
768
-
769
- ``` bash
770
- Aggregate-Compare -ref ./Testing/Myco.gff -dna ./Testing/Myco.fa -t Prodigal,TransDecoder,GeneMark_S_2 -tp ./Testing/Prodigal_Myco.gff,./Testing/TransDecoder_Myco.gff,./Testing/GeneMark_S_2_Myco.gff
771
- Prodigal
772
- TransDecoder
773
- GeneMark_S_2
774
- Match filtered out
775
- Match filtered out
776
- Match filtered out
777
- Match filtered out
778
- Match filtered out
779
- Match filtered out
780
- Genome Used: Myco
781
- Reference Used: ./Testing/Myco.gff
782
- Tools Compared: Prodigal,TransDecoder,GeneMark_S_2
783
- Perfect Matches:132[476]
784
- Partial Matches:58[476]
785
- Missed Genes:286[476]
764
+ Genome Used: Escherichia_coli.fasta
765
+ Reference Used: Escherichia_coli.gff
766
+ Tool Compared: Prodigal,GeneMarkS2
767
+ Total Number of Reference Genes: 5222
768
+ Number of Contigs: 4
769
+ Contig Genes ORFs Perfect_Matches Partial_Matches Missed_Genes Unmatched_ORFs Multi_Matched_ORFs
770
+ ERS715463SCcontig000003 4068 4500 4065 1 2 434 0
771
+ ERS715463SCcontig000002 1033 1148 1033 0 0 115 0
772
+ ERS715463SCcontig000001 75 92 75 0 0 17 0
773
+ ERS715463SCcontig000004 46 64 45 1 0 18 0
774
+
775
+ Overall Summary:
776
+ Number of Genes: 5222
777
+ Number of ORFs: 5804
778
+ Perfect Matches: 5218 [5222] - 99.92%
779
+ Partial Matches: 2 [5222] - 0.04%
780
+ Missed Genes: 2 [5222] - 0.04%
781
+ Unmatched ORFs: 584 [5222] - 11.18%
782
+ Multi-matched ORFs: 0 [5222] - 0.00%
783
+
784
+ Prodigal: Perfect=5218, Partial=2, Unmatched=9, Multi-matched=0
785
+
786
+ GeneMarkS2: Perfect=4609, Partial=2, Unmatched=579, Multi-matched=0
786
787
  ```
787
788
 
788
- This is the default output of the comparison tools.
789
-
790
- ### '-o' Example output to CSV file - Prodigal prediction compared to Ensembl Bacteria reference annotation of *Escherichia coli*:
791
- The output is designed to be human-readable and interpretable by the included 'ORForise_Analysis' scripts.
792
- The example below presents the 12 'Representative' and 72 'All' Metrics but only shows one entry for each of the induvidual prediction reports (Perfect_Match_Genes,Partial_Match_Genes,Missed_Genes,Predicted_CDS_Without_Corresponding_Gene_in_Reference,Predicted_CDSs_Which_Detected_more_than_one_Gene).
793
-
794
- ```csv
789
+ Shown so far have been the summary outputs of the comparison tools.
790
+ Since v 1.5.0, detailed CSV outputs are also provided for each contig analysed - See ```~/Test_Data/Genomes/E-coli/annotation_compare``` for example outputs.
791
+ ```commandline
795
792
  Representative_Metrics:
796
793
  Percentage_of_Genes_Detected,Percentage_of_ORFs_that_Detected_a_Gene,Percent_Difference_of_All_ORFs,Median_Length_Difference,Percentage_of_Perfect_Matches,Median_Start_Difference_of_Matched_ORFs,Median_Stop_Difference_of_Matched_ORFs,Percentage_Difference_of_Matched_Overlapping_CDSs,Percent_Difference_of_Short-Matched-ORFs,Precision,Recall,False_Discovery_Rate
797
- 39.92,19.10,109.03,-62.17,67.37,67.5,-85.5,-83.71,-17.39,0.19,0.40,0.81
798
- All_Metrics:
794
+ 100.00,97.87,2.17,1.20,97.83,6.0,N/A,0.00,0.00,0.98,1.00,0.02
795
+ Prediction_Metrics:
799
796
  Number_of_ORFs,Percent_Difference_of_All_ORFs,Number_of_ORFs_that_Detected_a_Gene,Percentage_of_ORFs_that_Detected_a_Gene,Number_of_Genes_Detected,Percentage_of_Genes_Detected,Median_Length_of_All_ORFs,Median_Length_Difference,Minimum_Length_of_All_ORFs,Minimum_Length_Difference,Maximum_Length_of_All_ORFs,Maximum_Length_Difference,Median_GC_content_of_All_ORFs,Percent_Difference_of_All_ORFs_Median_GC,Median_GC_content_of_Matched_ORFs,Percent_Difference_of_Matched_ORF_GC,Number_of_ORFs_which_Overlap_Another_ORF,Percent_Difference_of_Overlapping_ORFs,Maximum_ORF_Overlap,Median_ORF_Overlap,Number_of_Matched_ORFs_Overlapping_Another_ORF,Percentage_Difference_of_Matched_Overlapping_CDSs,Maximum_Matched_ORF_Overlap,Median_Matched_ORF_Overlap,Number_of_Short-ORFs,Percent_Difference_of_Short-ORFs,Number_of_Short-Matched-ORFs,Percent_Difference_of_Short-Matched-ORFs,Number_of_Perfect_Matches,Percentage_of_Perfect_Matches,Number_of_Perfect_Starts,Percentage_of_Perfect_Starts,Number_of_Perfect_Stops,Percentage_of_Perfect_Stops,Number_of_Out_of_Frame_ORFs,Number_of_Matched_ORFs_Extending_a_Coding_Region,Percentage_of_Matched_ORFs_Extending_a_Coding_Region,Number_of_Matched_ORFs_Extending_Start_Region,Percentage_of_Matched_ORFs_Extending_Start_Region,Number_of_Matched_ORFs_Extending_Stop_Region,Percentage_of_Matched_ORFs_Extending_Stop_Region,Number_of_All_ORFs_on_Positive_Strand,Percentage_of_All_ORFs_on_Positive_Strand,Number_of_All_ORFs_on_Negative_Strand,Percentage_of_All_ORFs_on_Negative_Strand,Median_Start_Difference_of_Matched_ORFs,Median_Stop_Difference_of_Matched_ORFs,ATG_Start_Percentage,GTG_Start_Percentage,TTG_Start_Percentage,ATT_Start_Percentage,CTG_Start_Percentage,Other_Start_Codon_Percentage,TAG_Stop_Percentage,TAA_Stop_Percentage,TGA_Stop_Percentage,Other_Stop_Codon_Percentage,True_Positive,False_Positive,False_Negative,Precision,Recall,False_Discovery_Rate,Nucleotide_True_Positive,Nucleotide_False_Positive,Nucleotide_True_Negative,Nucleotide_False_Negative,Nucleotide_Precision,Nucleotide_Recall,Nucleotide_False_Discovery_Rate,ORF_Nucleotide_Coverage_of_Genome,Matched_ORF_Nucleotide_Coverage_of_Genome
800
- 995,109.03,190,19.10,190,39.92,335.0,-62.17,89,-21.24,3152,-41.81,31.50,0.20,32.83,4.42,279,26.24,135,0.00,36,-83.71,31,4.50,443,1826.09,19,-17.39,128,67.37,162,85.26,154,81.05,0,0,0.00,4,2.11,0,0.00,570,0.57,425,0.43,67.5,-85.5,63.12,15.28,21.61,0.00,0.00,0.00,11.06,27.44,61.51,0.00,0.40,1.69,0.60,0.19,0.40,0.81,0.82,0.31,0.69,0.18,0.96,0.82,0.04,77.15,24.47
801
- CDS_Gene_Coverage_of_Genome:
802
- 90.62
803
- Start_Position_Difference:
804
- -78,33,93,294,144,408,3,18,156,-42,45,90,333,333,-39,111,201,93,120,-354,-150,-366,117,-138,-240,123,-153,-51
805
- Stop_Position_Difference:
806
- -192,-147,108,-216,87,-678,-96,-156,-321,-240,-168,-162,-51,-126,-33,-3,-93,-12,-204,-189,-156,237,-45,-219,-201,-537,-30,-78,159,243,60,21,15,183,288,6
807
- Alternative_Starts_Predicted:
808
-
809
- Alternative_Stops_Predicted:
810
-
811
- Undetected_Gene_Metrics:
812
- ATG_Start ,GTG_Start ,TTG_Start ,ATT_Start ,CTG_Start ,Alternative_Start_Codon ,TGA_Stop ,TAA_Stop ,TAG_Stop ,Alternative_Stop_Codon ,Median_Length ,ORFs_on_Positive_Strand ,ORFs_on_Negative_Strand
813
- 88.46,7.69,3.85,0.00,0.00,0.00,0.00,74.13,25.87,0.00,1047.50,156,130
814
- Perfect_Match_Genes:
815
- >Myco_686_1828_+
816
- ATGAAAATATTAATTAATAAAAGTGAATTGAATAAAATTTTGAAAAAAATGAATAACGTTATTATTTCCAATAACAAAATAAAACCACATCATTCATATTTTTTAATAGAGGCAAAAGAAAAAGAAATAAACTTTTATGCTAACAATGAATACTTTTCTGTCAAATGTAATTTAAATAAAAATATTGATATTCTTGAACAAGGCTCCTTAATTGTTAAAGGAAAAATTTTTAACGATCTTATTAATGGCATAAAAGAAGAGATTATTACTATTCAAGAAAAAGATCAAACACTTTTGGTTAAAACAAAAAAAACAAGTATTAATTTAAACACAATTAATGTGAATGAATTTCCAAGAATAAGGTTTAATGAAAAAAACGATTTAAGTGAATTTAATCAATTCAAAATAAATTATTCACTTTTAGTAAAAGGCATTAAAAAAATTTTTCACTCAGTTTCAAATAATCGTGAAATATCTTCTAAATTTAATGGAGTAAATTTCAATGGATCCAATGGAAAAGAAATATTTTTAGAAGCTTCTGACACTTATAAACTATCTGTTTTTGAGATAAAGCAAGAAACAGAACCATTTGATTTCATTTTGGAGAGTAATTTACTTAGTTTCATTAATTCTTTTAATCCTGAAGAAGATAAATCTATTGTTTTTTATTACAGAAAAGATAATAAAGATAGCTTTAGTACAGAAATGTTGATTTCAATGGATAACTTTATGATTAGTTACACATCGGTTAATGAAAAATTTCCAGAGGTAAACTACTTTTTTGAATTTGAACCTGAAACTAAAATAGTTGTTCAAAAAAATGAATTAAAAGATGCACTTCAAAGAATTCAAACTTTGGCTCAAAATGAAAGAACTTTTTTATGCGATATGCAAATTAACAGTTCTGAATTAAAAATAAGAGCTATTGTTAATAATATCGGAAATTCTCTTGAGGAAATTTCTTGTCTTAAATTTGAAGGTTATAAACTTAATATTTCTTTTAACCCAAGTTCTCTATTAGATCACATAGAGTCTTTTGAATCAAATGAAATAAATTTTGATTTCCAAGGAAATAGTAAGTATTTTTTGATAACCTCTAAAAGTGAACCTGAACTTAAGCAAATATTGGTTCCTTCAAGATAA
817
-
818
- >Myco_4812_7322_+
819
- ATGGCAAAGCAACAAGATCAAGTAGATAAGATTCGTGAAAACTTAGACAATTCAACTGTCAAAAGTATTTCATTAGCAAATGAACTTGAGCGTTCATTCATGGAATATGCTATGTCAGTTATTGTTGCTCGTGCTTTACCTGATGCTAGAGATGGACTTAAACCAGTTCATCGTCGTGTTCTTTATGGTGCTTATATTGGTGGCATGCACCATGATCGTCCTTTTAAAAAGTCTGCGAGGATTGTTGGTGATGTAATGAGTAAATTCCACCCTCATGGTGATATGGCAATATATGACACCATGTCAAGAATGGCTCAAGACTTTTCATTAAGATACCTTTTAATTGATGGTCATGGTAATTTTGGTTCTATAGATGGTGATAGACCTGCTGCACAACGTTATACAGAAGCAAGATTATCTAAACTTGCAGCAGAACTTTTAAAAGATATTGATAAAGATACAGTTGACTTTATTGCTAATTATGATGGTGAGGAAAAAGAACCAACTGTTCTACCAGCAGCTTTCCCTAACTTACTTGCAAATGGTTCTAGTGGGATTGCAGTTGGAATGTCAACATCTATTCCTTCCCATAATCTCTCTGAATTAATTGCGGGTTTAATCATGTTAATTGATAATCCTCAATGCACTTTTCAAGAATTATTAACTGTAATTAAAGGACCTGATTTTCCAACAGGAGCTAACATTATCTACACAAAAGGAATTGAAAGCTACTTTGAAACAGGTAAAGGCAATGTAGTAATTCGTTCTAAAGTTGAGATAGAACAATTGCAAACAAGAAGTGCATTAGTTGTAACTGAAATTCCTTACATGGTTAACAAAACTACCTTAATTGAAAAGATTGTAGAACTTGTTAAAGCTGAAGAGATTTCAGGAATTGCTGATATCCGTGATGAATCCTCTCGAGAAGGAATAAGGTTAGTGATTGAAGTAAAACGCGACACTGTACCTGAAGTTTTATTAAATCAACTTTTTAAATCAACAAGATTACAAGTACGCTTCCCTGTTAATATGCTTGCTTTAGTTAAAGGAGCTCCTGTACTTCTCAACATGAAACAAGCTTTGGAAGTATATCTTGATCATCAAATTGATGTTCTTGTTAGAAAAACAAAGTTTGTGCTTAATAAACAACAAGAACGTTATCACATTTTAAGCGGACTTTTAATTGCTGCTTTAAATATTGATGAGGTTGTTGCAATTATTAAAAAATCAGCAAATAACCAGGAAGCAATTAATACATTAAATACAAAGTTTAAGCTTGATGAAATTCAAGCTAAAGCAGTTCTTGACATGCGTTTAAGGAGCTTAAGCGTACTTGAAGTTAACAAACTTCAAACTGAACAAAAAGAGTTAAAAGATTCAATTGAATTTTGTAAGAAAGTGTTAGCTGATCAAAAATTACAGCTAAAAATAATCAAAGAGGAATTGCAAAAAATCAATGATCAGTTTGGTGATGAAAGAAGAAGTGAAATTCTCTATGATATCTCTGAGGAAATTGATGATGAATCATTGATAAAAGTTGAGAATGTAGTGATAACTATGTCTACAAATGGTTATCTAAAAAGGATTGGAGTTGATGCTTATAATCTTCAACATCGTGGTGGAGTTGGGGTTAAAGGGCTAACTACTTATGTTGATGATAGTATTAGTCAATTATTGGTCTGTTCAACTCACTCTGACTTATTATTTTTTACTGATAAGGGTAAGGTTTATAGAATTAGAGCTCATCAAATTCCCTATGGTTTTAGAACAAATAAAGGTATTCCCGCTGTTAACTTAATCAAAATTGAAAAGGATGAAAGAATTTGTTCATTGTTATCTGTTAATAACTATGATGATGGTTATTTCTTTTTCTGTACTAAAAATGGAATTGTTAAAAGAACGAGCTTGAATGAATTCATCAACATCTTAAGTAATGGTAAGCGGGCTATATCTTTTGATGATAATGACACTTTGTATTCAGTAATTAAAACCCACGGAAATGATGAGATTTTTATTGGTTCTACCAATGGATTTGTTGTTCGCTTCCATGAAAATCAACTCAGAGTTCTTTCAAGAACAGCAAGAGGTGTATTTGGTATCAGTTTAAATAAAGGAGAATTTGTTAATGGACTATCAACTTCAAGCAACGGTAGCTTACTTTTATCAGTCGGTCAAAATGGAATAGGTAAATTAACGAGCATAGATAAATATAGACTCACAAAACGTAATGCTAAGGGAGTTAAAACTCTAAGGGTTACTGATAGAACAGGCCCTGTTGTTACAACAACCACTGTTTTTGGTAATGAGGATCTTTTAATGATTTCCTCTGCTGGTAAAATTGTGCGTACCAGTTTACAAGAACTTTCAGAACAAGGTAAAAACACTTCTGGTGTTAAGTTAATTAGATTAAAAGATAATGAACGTTTAGAAAGAGTAACTATCTTTAAAGAAGAGTTAGAAGACAAAGAAATGCAACTAGAAGATGTTGGATCCAAACAAATTACGCAATAA
820
- .........
821
- Partial_Match_Genes:
822
- Gene:9923_11251_+_ATG_TAA
823
- ATGAAAAGCGAAATTAATATTTTTGCACTAGCAACTGCACCTTTTAATAGTGCATTACATATTATTAGGTTTTCTGGTCCTGATGTTTATGAGATTTTAAACAAGATAACTAATAAAAAAATAACAAGAAAAGGGATGCAAATTCAACGCACATGGATAGTTGATGAAAACAATAAGCGAATTGATGATGTGCTATTATTTAAATTTGTCTCTCCAAATTCTTATACAGGAGAAGATTTAATTGAAATTTCTTGTCATGGTAACATGTTGATCGTTAATGAAATTTGCGCACTTCTTTTAAAAAAAGGAGGTGTTTATGCCAAACCTGGTGAATTTACCCAAAGGAGTTTTTTAAATGGAAAAATGAGTTTACAACAAGCTAGTGCTGTAAATAAATTGATTTTATCTCCTAACTTATTAGTTAAAGATATAGTCTTAAATAATTTAGCGGGTGAAATGGATCAACAATTAGAACAAATAGCTCAACAAGTTAATCAATTAGTAATGCAAATGGAAGTAAACATTGATTATCCAGAATATCTTGATGAACAAGTAGAACTATCAACTTTAAATAATAAAGTTAAATTGATTATTGAAAAGCTTAAAAGAATTATTGAAAATAGTAAACAACTCAAAAAACTTCACGATCCTTTTAAAATTGCCATTATAGGCGAAACTAATGTAGGTAAATCTTCTTTACTCAACGCTTTATTAAATCAAGATAAAGCGATAGTTTCAAATATTAAAGGTAGTACACGCGATGTTGTTGAAGGGGATTTCAATTTAAATGGTTATTTAATCAAGATCTTAGATACTGCAGGTATCCGTAAACATAAAAGTGGGCTTGAAAAAGCAGGAATTAAAAAAAGCTTTGAATCTATAAAGCAAGCTAATTTGGTTATTTATCTTTTAGATGCAACACATCCAAAGAAAGATCTTGAATTAATTAGTTTTTTTAAGAAAAATAAAAAGGATTTTTTTGTTTTCTATAACAAAAAAGATTTAATTACAAATAAGTTTGAAAATAGTATTTCTGCAAAGCAAAAAGATATTAAAGAATTAGTTGATTTATTAACTAAATATATTAACGAGTTTTATAAAAAAATAGATCAAAAAATCTATCTGATTGAAAATTGACAGCAAATTTTAATTGAAAAAATTAAAGAACAATTAGAACAGTTTTTAAAGCAACAAAAAAAATATTTATTTTTCGATGTTTTAGTTACCCATCTAAGAGAAGCTCAACAAGATATTCTTAAACTACTAGGTAAGGATGTAGGTTTTGATTTAGTTAATGAAATTTTTAATAATTTTTGTTTAGGAAAATAA
824
- ORF:9923_11059_+_ATG_TGA
825
- ATGAAAAGCGAAATTAATATTTTTGCACTAGCAACTGCACCTTTTAATAGTGCATTACATATTATTAGGTTTTCTGGTCCTGATGTTTATGAGATTTTAAACAAGATAACTAATAAAAAAATAACAAGAAAAGGGATGCAAATTCAACGCACATGGATAGTTGATGAAAACAATAAGCGAATTGATGATGTGCTATTATTTAAATTTGTCTCTCCAAATTCTTATACAGGAGAAGATTTAATTGAAATTTCTTGTCATGGTAACATGTTGATCGTTAATGAAATTTGCGCACTTCTTTTAAAAAAAGGAGGTGTTTATGCCAAACCTGGTGAATTTACCCAAAGGAGTTTTTTAAATGGAAAAATGAGTTTACAACAAGCTAGTGCTGTAAATAAATTGATTTTATCTCCTAACTTATTAGTTAAAGATATAGTCTTAAATAATTTAGCGGGTGAAATGGATCAACAATTAGAACAAATAGCTCAACAAGTTAATCAATTAGTAATGCAAATGGAAGTAAACATTGATTATCCAGAATATCTTGATGAACAAGTAGAACTATCAACTTTAAATAATAAAGTTAAATTGATTATTGAAAAGCTTAAAAGAATTATTGAAAATAGTAAACAACTCAAAAAACTTCACGATCCTTTTAAAATTGCCATTATAGGCGAAACTAATGTAGGTAAATCTTCTTTACTCAACGCTTTATTAAATCAAGATAAAGCGATAGTTTCAAATATTAAAGGTAGTACACGCGATGTTGTTGAAGGGGATTTCAATTTAAATGGTTATTTAATCAAGATCTTAGATACTGCAGGTATCCGTAAACATAAAAGTGGGCTTGAAAAAGCAGGAATTAAAAAAAGCTTTGAATCTATAAAGCAAGCTAATTTGGTTATTTATCTTTTAGATGCAACACATCCAAAGAAAGATCTTGAATTAATTAGTTTTTTTAAGAAAAATAAAAAGGATTTTTTTGTTTTCTATAACAAAAAAGATTTAATTACAAATAAGTTTGAAAATAGTATTTCTGCAAAGCAAAAAGATATTAAAGAATTAGTTGATTTATTAACTAAATATATTAACGAGTTTTATAAAAAAATAGATCAAAAAATCTATCTGATTGAAAATTGA
826
-
827
- Gene:11251_12039_+_ATG_TAA
828
- ATGGAATACTTTGATGCACATTGTCATTTAAATTGTGAACCTTTACTGAGTGAAATTGAAAAAAGCATCGCTAATTTCAAATTAATTAATTTAAAAGCAAATGTTGTAGGTACAGATTTGGATAATTCTAAAATTGCTGTTGAATTAGCTAAAAAATATCCTGATCTTTTAAAAGCAACCATAGGTATCCATCCAAATGATGTTCATTTAGTTGATTTTAAAAAGACAAAAAAACAACTTAATGAACTATTAATAAATAACAGAAATTTCATAAGTTGTATTGGTGAATATGGTTTTGATTATCACTACACAACAGAATTTATTGAATTGCAAAACAAATTCTTTGAGATGCAATTTGAAATAGCTGAAACTAATAAATTGGTTCACATGCTTCATATTCGTGATGCTCATGAAAAAATTTATGAAATATTAACAAGATTAAAGCCAACTCAACCTGTGATTTTTCATTGTTTCAGTCAAGATATAAATATTGCTAAAAAGCTACTATCATTAAAAGATTTAAATATTGACATCTTCTTTTCTATCCCAGGGATAGTTACTTTTAAGAATGCTCAAGCATTACATGAAGCTTTAAAGATTATTCCTAGTGAATTACTTTTAAGTGAAACTGACTCACCGTGATTAACCCCTTCTCCTTTTCGAGGCAAAGTTAACTGACCTGAATATGTAGTTCATACTGTTAGCACTGTTGCTGAAATAAAAAAAATAGAAATTGCTGAAATGAAGCGAATTATTGTTAAAAATGCAAAAAAATTATTTTGACATTAA
829
- ORF:11251_11892_+_ATG_TGA
830
- ATGGAATACTTTGATGCACATTGTCATTTAAATTGTGAACCTTTACTGAGTGAAATTGAAAAAAGCATCGCTAATTTCAAATTAATTAATTTAAAAGCAAATGTTGTAGGTACAGATTTGGATAATTCTAAAATTGCTGTTGAATTAGCTAAAAAATATCCTGATCTTTTAAAAGCAACCATAGGTATCCATCCAAATGATGTTCATTTAGTTGATTTTAAAAAGACAAAAAAACAACTTAATGAACTATTAATAAATAACAGAAATTTCATAAGTTGTATTGGTGAATATGGTTTTGATTATCACTACACAACAGAATTTATTGAATTGCAAAACAAATTCTTTGAGATGCAATTTGAAATAGCTGAAACTAATAAATTGGTTCACATGCTTCATATTCGTGATGCTCATGAAAAAATTTATGAAATATTAACAAGATTAAAGCCAACTCAACCTGTGATTTTTCATTGTTTCAGTCAAGATATAAATATTGCTAAAAAGCTACTATCATTAAAAGATTTAAATATTGACATCTTCTTTTCTATCCCAGGGATAGTTACTTTTAAGAATGCTCAAGCATTACATGAAGCTTTAAAGATTATTCCTAGTGAATTACTTTTAAGTGAAACTGACTCACCGTGA
831
- .......
832
- Missed_Genes:
833
- >Myco_1828_2760_+
834
- ATGAATCTTTACGATCTTTTAGAACTACCAACTACAGCATCAATAAAAGAAATAAAAATTGCTTATAAAAGATTAGCAAAGCGTTATCACCCTGATGTAAATAAATTAGGTTCGCAAACTTTTGTTGAAATTAATAATGCTTATTCAATATTAAGTGATCCTAACCAAAAGGAAAAATATGATTCAATGCTGAAAGTTAATGATTTTCAAAATCGCATCAAAAATTTAGATATTAGTGTTAGATGACATGAAAATTTCATGGAAGAACTCGAACTTCGTAAGAACTGAGAATTTGATTTTTTTTCATCTGATGAAGATTTCTTTTATTCTCCATTTACAAAAAACAAATATGCTTCCTTTTTAGATAAAGATGTTTCTTTAGCTTTTTTTCAGCTTTACAGCAAGGGCAAAATAGATCATCAATTGGAAAAATCTTTATTGAAAAGAAGAGATGTAAAAGAAGCTTGTCAACAGAATAAAAATTTTATTGAAGTTATAAAAGAGCAATATAACTATTTTGGTTGAATTGAAGCTAAGCGTTATTTCAATATTAATGTTGAACTTGAGCTCACACAGAGAGAGATAAGAGATAGAGATGTTGTTAACCTACCTTTAAAAATTAAAGTTATTAATAATGATTTTCCAAATCAACTCTGATATGAAATTTATAAAAACTATTCATTTCGCTTATCTTGAGATATAAAAAATGGTGAAATTGCTGAATTTTTCAATAAAGGTAATAGAGCTTTAGGATGAAAAGGTGACTTAATTGTCAGAATGAAAGTAGTTAATAAAGTAAACAAAAGACTGCGTATTTTTTCAAGCTTTTTTGAGAACGATAAATCTAAATTATGGTTCCTTGTTCCAAACGATAAACAAAGTAATCCTAATAAGGGCGTTTTTAACTATAAAACTCAGCACTTTATTGATTAA
835
-
836
- >Myco_2845_4797_+
837
- ATGGAAGAAAATAACAAAGCAAATATCTATGACTCTAGTAGCATTAAGGTCCTTGAAGGACTTGAGGCTGTTAGAAAACGCCCTGGAATGTACATTGGTTCTACTGGCGAAGAAGGTTTGCATCACATGATCTGAGAGATAGTAGACAACTCAATTGATGAAGCAATGGGAGGTTTTGCCAGTTTTGTTAAGCTTACCCTTGAAGATAATTTTGTTACCCGTGTAGAGGATGATGGAAGAGGGATACCTGTTGATATCCATCCTAAGACTAATCGTTCTACAGTTGAAACAGTTTTTACAGTTCTACACGCTGGCGGTAAATTTGATAACGATAGCTATAAAGTGTCAGGTGGTTTACACGGTGTTGGTGCATCAGTTGTTAATGCGCTTAGTTCTTCTTTTAAAGTTTGAGTTTTTCGTCAAAATAAAAAGTATTTTCTCAGCTTTAGCGATGGAGGAAAGGTAATTGGAGATTTGGTCCAAGAAGGTAACTCTGAAAAAGAGCATGGAACAATTGTTGAGTTTGTTCCTGATTTCTCTGTAATGGAAAAGAGTGATTACAAACAAACTGTAATTGTAAGCAGACTCCAGCAATTAGCTTTTTTAAACAAGGGAATAAGAATTGACTTTGTTGATAATCGTAAACAAAACCCACAGTCTTTTTCTTGAAAATATGATGGGGGATTGGTTGAATATATCCACCACCTAAACAACGAAAAAGAACCACTTTTTAATGAAGTTATTGCTGATGAAAAAACTGAAACTGTAAAAGCTGTTAATCGTGATGAAAACTACACAGTAAAGGTTGAAGTTGCTTTTCAATATAACAAAACATACAACCAATCAATTTTCAGTTTTTGTAACAACATTAATACTACAGAAGGTGGAACCCATGTGGAAGGTTTTCGTAATGCACTTGTTAAGATCATTAATCGCTTTGCTGTTGAAAATAAATTCCTAAAAGATAGTGATGAAAAGATTAACCGTGATGATGTTTGTGAAGGATTAACTGCTATTATTTCCATTAAACACCCAAACCCACAATATGAAGGACAAACTAAAAAGAAGTTAGGTAATACTGAGGTAAGACCTTTAGTTAATAGTGTTGTTAGTGAAATCTTTGAACGCTTCATGTTAGAAAACCCACAAGAAGCAAACGCTATCATCAGAAAAACACTTTTAGCTCAAGAAGCGAGAAGAAGAAGTCAAGAGGCTAGGGAGTTAACTCGTCGTAAATCACCTTTTGATAGTGGTTCATTACCAGGTAAATTAGCTGATTGTACAACCAGAGATCCTTCGATTAGTGAACTTTACATTGTTGAGGGTGATAGTGCTGGTGGCACTGCTAAAACAGGAAGAGATCGTTATTTTCAAGCTATCTTACCCTTAAGAGGAAAGATTTTAAACGTTGAAAAATCTAACTTTGAACAAATCTTTAATAATGCAGAAATTTCTGCATTAGTGATGGCAATAGGCTGTGGGATTAAACCTGATTTTGAACTTGAAAAACTTAGATATAGCAAGATTGTGATCATGACAGATGCTGATGTTGATGGTGCACACATAAGAACACTTCTCTTAACTTTCTTTTTTCGCTTTATGTATCCTTTGGTTGAACAAGGCAATATTTTTATTGCTCAACCCCCACTTTATAAAGTGTCATATTCCCATAAGGATTTATACATGCACACTGATGTTCAACTTGAACAGTGAAAAAGTCAAAACCCTAACGTAAAGTTTGGGTTACAAAGATATAAAGGACTTGGAGAAATGGATGCATTGCAGCTGTGAGAAACAACAATGGATCCTAAGGTTAGAACATTGTTAAAAGTTACTGTTGAAGATGCTTCTATTGCTGATAAAGCTTTTTCACTGTTGATGGGTGATGAAGTTCCCCCAAGAAGAGAATTTATTGAAAAAAATGCTCGTAGTGTTAAAAACATTGATATTTAA
838
-
839
- >Myco_7294_8547_+
840
- ATGTTGGATCCAAACAAATTACGCAATAACTATGATTTCTTTAAAAAGAAACTGTTAGAAAGAAATGTAAATGAGCAATTATTAAATCAGTTTATTCAAACTGATAAACTAATGCGCAAAAACTTGCAACAACTTGAACTTGCTAACCAAAAACAAAGCTTGTTGGCAAAACAAGTTGCTAAGCAAAAAGATAATAAAAAGCTATTAGCTGAATCAAAAGAACTTAAGCAGAAGATTGAAAACTTAAATAATGCTTATAAAGATTCACAAAACATTAGTCAAGATTTACTTCTAAATTTTCCTAATATTGCTCATGAATCAGTTCCTGTTGGTAAAAATGAATCAGCAAACTTAGAACTTCTTAAAGAAGGGAGAAAACCAGTTTTTGATTTCAAACCTTTACCACATCGAGAGTTATGTGAAAAGTTAAATTTAGTTGCTTTTGATAAAGCTACTAAGATTAGTGGAACTAGGTTTGTTGCATATACAGATAAAGCAGCTAAACTACTTAGAGCGATAACTAATCTAATGATTGACCTTAATAAAAGCAAGTATCAAGAATGAAACCTGCCAGTTGTTATTAATGAATTAAGTTTAAGATCAACCGGACAACTACCTAAGTTTAAAGATGATGTTTTTAAACTAGAAAACACCCGTTATTATCTTTCTCCAACTTTAGAGGTACAACTTATCAATTTACATGCTAATGAAATTTTTAATGAAGAAGATTTACCTAAATACTACACTGCAACAGGTATTAACTTTCGTCAAGAAGCGGGTAGTGCTGGTAAACAAACCAAAGGAACTATTAGATTGCATCAGTTTCAAAAAACTGAGTTAGTTAAGTTTTGTAAACCTGAAAATGCTATCAATGAATTGGAAGCAATGGTTAGAGATGCTGAACAAATCTTAAAGGCACTTAAGTTACCTTTTAGAAGGTTATTGTTATGTACTGGTGATATGGGCTTTAGTGCTGAAAAAACATATGATCTTGAAGTTTGAATGGCAGCTAGCAATGAATATCGTGAAGTTTCTTCTTGTTCATCTTGTGGTGATTTTCAAGCAAGAAGAGCTATGATTCGTTACAAAGATATTAACAACGGTAAAAACAGTTATGTTGCTACTTTAAATGGAACAGCATTATCTATTGATAGAATTTTTGCTGCAATTCTAGAAAATTTTCAAACAAAAGATGGCAAAATTCTTATCCCACAAGCATTAAAAAAATACCTTGATTTTGACACAATCAAGTAA
841
- ......
842
-
843
- ORFs_Without_Corresponding_Gene_In_Reference_Metrics:
844
- ATG_Start ,GTG_Start ,TTG_Start ,ATT_Start ,CTG_Start ,Alternative_Start_Codon ,TGA_Stop ,TAA_Stop ,TAG_Stop ,Alternative_Stop_Codon ,Median_Length ,ORFs_on_Positive_Strand ,ORFs_on_Negative_Strand
845
- 58.39,17.14,24.47,0.00,0.00,0.00,71.55,20.62,7.83,0.00,287.00,449,356
846
- ORF_Without_Corresponding_Gene_in_Reference:
847
- >Prodigal_1828_2073_+
848
- ATGAATCTTTACGATCTTTTAGAACTACCAACTACAGCATCAATAAAAGAAATAAAAATTGCTTATAAAAGATTAGCAAAGCGTTATCACCCTGATGTAAATAAATTAGGTTCGCAAACTTTTGTTGAAATTAATAATGCTTATTCAATATTAAGTGATCCTAACCAAAAGGAAAAATATGATTCAATGCTGAAAGTTAATGATTTTCAAAATCGCATCAAAAATTTAGATATTAGTGTTAGATGA
849
- >Prodigal_2605_2760_+
850
- ATGAAAGTAGTTAATAAAGTAAACAAAAGACTGCGTATTTTTTCAAGCTTTTTTGAGAACGATAAATCTAAATTATGGTTCCTTGTTCCAAACGATAAACAAAGTAATCCTAATAAGGGCGTTTTTAACTATAAAACTCAGCACTTTATTGATTAA
851
- >Prodigal_2845_2979_+
852
- ATGGAAGAAAATAACAAAGCAAATATCTATGACTCTAGTAGCATTAAGGTCCTTGAAGGACTTGAGGCTGTTAGAAAACGCCCTGGAATGTACATTGGTTCTACTGGCGAAGAAGGTTTGCATCACATGATCTGA
853
- >Prodigal_3010_3255_+
854
- ATGGGAGGTTTTGCCAGTTTTGTTAAGCTTACCCTTGAAGATAATTTTGTTACCCGTGTAGAGGATGATGGAAGAGGGATACCTGTTGATATCCATCCTAAGACTAATCGTTCTACAGTTGAAACAGTTTTTACAGTTCTACACGCTGGCGGTAAATTTGATAACGATAGCTATAAAGTGTCAGGTGGTTTACACGGTGTTGGTGCATCAGTTGTTAATGCGCTTAGTTCTTCTTTTAAAGTTTGA
855
- >Prodigal_3319_3513_+
856
- TTGGTCCAAGAAGGTAACTCTGAAAAAGAGCATGGAACAATTGTTGAGTTTGTTCCTGATTTCTCTGTAATGGAAAAGAGTGATTACAAACAAACTGTAATTGTAAGCAGACTCCAGCAATTAGCTTTTTTAAACAAGGGAATAAGAATTGACTTTGTTGATAATCGTAAACAAAACCCACAGTCTTTTTCTTGA
857
- >Prodigal_3529_4557_+
858
- TTGGTTGAATATATCCACCACCTAAACAACGAAAAAGAACCACTTTTTAATGAAGTTATTGCTGATGAAAAAACTGAAACTGTAAAAGCTGTTAATCGTGATGAAAACTACACAGTAAAGGTTGAAGTTGCTTTTCAATATAACAAAACATACAACCAATCAATTTTCAGTTTTTGTAACAACATTAATACTACAGAAGGTGGAACCCATGTGGAAGGTTTTCGTAATGCACTTGTTAAGATCATTAATCGCTTTGCTGTTGAAAATAAATTCCTAAAAGATAGTGATGAAAAGATTAACCGTGATGATGTTTGTGAAGGATTAACTGCTATTATTTCCATTAAACACCCAAACCCACAATATGAAGGACAAACTAAAAAGAAGTTAGGTAATACTGAGGTAAGACCTTTAGTTAATAGTGTTGTTAGTGAAATCTTTGAACGCTTCATGTTAGAAAACCCACAAGAAGCAAACGCTATCATCAGAAAAACACTTTTAGCTCAAGAAGCGAGAAGAAGAAGTCAAGAGGCTAGGGAGTTAACTCGTCGTAAATCACCTTTTGATAGTGGTTCATTACCAGGTAAATTAGCTGATTGTACAACCAGAGATCCTTCGATTAGTGAACTTTACATTGTTGAGGGTGATAGTGCTGGTGGCACTGCTAAAACAGGAAGAGATCGTTATTTTCAAGCTATCTTACCCTTAAGAGGAAAGATTTTAAACGTTGAAAAATCTAACTTTGAACAAATCTTTAATAATGCAGAAATTTCTGCATTAGTGATGGCAATAGGCTGTGGGATTAAACCTGATTTTGAACTTGAAAAACTTAGATATAGCAAGATTGTGATCATGACAGATGCTGATGTTGATGGTGCACACATAAGAACACTTCTCTTAACTTTCTTTTTTCGCTTTATGTATCCTTTGGTTGAACAAGGCAATATTTTTATTGCTCAACCCCCACTTTATAAAGTGTCATATTCCCATAAGGATTTATACATGCACACTGATGTTCAACTTGAACAGTGA
859
- ....
860
- ORFs_Which_Detected_more_than_one_Gene:
797
+ 47,2.17,46,97.87,46,100.00,378.0,1.20,138,0.00,1551,0.00,54.79,-0.05,54.81,0.00,36,63.64,61,0.00,22,0.00,61,3.00,13,0.00,13,0.00,45,97.83,45,97.83,46,100.00,0,0,0.00,1,2.17,0,0.00,11,0.23,36,0.77,6.0,N/A,82.98,12.77,2.13,0.00,0.00,2.13,6.38,23.40,68.09,2.13,1.00,0.02,0.00,0.98,1.00,0.02,1.00,0.16,0.84,0.00,0.96,1.00,0.04,81.98,78.63
798
+ Reference_CDS_Gene_Coverage_of_Genome
799
+ 78.61
800
+ Predicted_CDS_Coverage_of_Genome
801
+ 81.98
802
+ Matched_Predicted_CDS_Coverage_of_Genome
803
+ 78.63
861
804
 
862
805
  ```
863
806
 
864
807
 
865
- ## GFF Tools:
866
-
808
+ ## GFF/Annotation Manipulation Tools: ORForise also provides tools to manipulate and combine existing annotations in GFF format or other tool-specific formats.
867
809
  ### GFF-Adder:
868
-
869
- GFF-Adder allows for the addition of predicted CDSs to an existing reference annotation (GFF or another tool) which produces a new GFF containing the original
870
- genes plus the new CDS from another prediction. Default filtering will remove additional CDSs that overlap existing genes by more than 50 nt.
871
- The ```-gi``` option can be used to allow for different genomic elements to be accounted for, other than only CDSs in the reference annotation.
872
-
810
+ GFF-Adder combines two existing annotations (GFF or other tool formats).
873
811
  For Help: ```GFF-Adder -h ```
874
812
 
875
813
  ```python
876
- ORForise v1.6.0: GFF-Adder Run Parameters.
814
+ ORForise v1.6.1: GFF-Adder Run Parameters.
877
815
 
878
816
  Required Arguments:
879
817
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
880
818
  -ref REFERENCE_ANNOTATION
881
819
  Which reference annotation file to use as reference?
882
- -at ADDITIONAL_TOOL Which format to use for additional annotation?
820
+ -at ADDITIONAL_TOOL Which format to use for additional annotation? - Can provide multiple annotations (Tool1,Tool2)
883
821
  -add ADDITIONAL_ANNOTATION
884
- Which annotation file to add to reference annotation?
822
+ Which annotation file to add to reference annotation? - Can provide multiple annotations (1.GFF,2.GFF)
885
823
  -o OUTPUT_FILE Output filename
886
824
 
887
825
  Optional Arguments:
888
- -rt REFERENCE_TOOL Which tool format to use as reference? - If not provided, will default to standard Ensembl
889
- GFF format, can be Prodigal or any of the other tools available
890
- -gi GENE_IDENT Identifier used for extraction of "genic" regions from reference annotation "CDS,rRNA,tRNA":
891
- Default for is "CDS"
892
- -gene_ident GENE_IDENT
826
+ -rt REFERENCE_TOOL Which tool format to use as reference? - If not provided, will default to the standard GFF format and will only look for "CDS" features
827
+ --gene_ident GENE_IDENT
893
828
  Identifier used for identifying genomic features in reference annotation "CDS,rRNA,tRNA"
894
- -olap OVERLAP Maximum overlap between reference and additional genic regions (CDS,rRNA etc) - Default: 50
895
- nt
829
+ -mc Default - False: Mark reference annotations which where present in the additional tool annotation
830
+ -c Default - False: Do not mark 9th column with "Original/Matched/Additional tag"
831
+ --meta Default - False: Output metadata file
832
+ --olap OVERLAP Maximum overlap between reference and additional genic regions (CDS,rRNA etc) - Default: 50 nt
833
+
834
+ Misc:
835
+ -v {True,False} Default - False: Print out runtime status
836
+
837
+
896
838
  ```
897
839
 
898
840
  #### Example: Running GFF-Adder to combine the additional CDS predictions made by Prodial to the canonical annotations from Ensembl.
899
- ``` GFF-Adder -dna ~/Testing/Myco.fa -ref ~/Testing/Myco.gff -at Prodigal -add ~/Testing/Prodigal_Myco.gff -o ~/Testing/Myco_Ensembl_GFF_Adder_Prodigal.gff ```
841
+ ``` GFF-Adder -dna ~/Test_Data/Genomes/E-coli/Escherichia_coli.fasta -ref ~/Test_Data/Genomes/E-coli/Escherichia_coli.gff -at Prodigal -add ~/Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff -o ~/Test_Data/Genomes/E-coli/Ensembl_AND_Prodigal_Escherichia_coli.gff ```
900
842
  #### Example Output: ~/ORForise/Testing/Myco_Ensembl_GFF_Adder_Prodigal.gff
901
843
  ```
902
844
  ##gff-version 3
903
845
  # GFF-Adder
904
- # Run Date:2021-11-10
905
- ##Genome DNA File:./Testing/Myco.fa
906
- ##Original File: ./Testing/Myco.gff
907
- ##Additional File: ./Testing/Prodigal_Myco.gff
908
- .......
909
- Chromosome Reference_Annotation CDS 68522 70225 . - . ID=Original_Annotation
910
- Chromosome Reference_Annotation CDS 70530 72572 . + . ID=Original_Annotation
911
- Chromosome Reference_Annotation CDS 72523 73434 . + . ID=Original_Annotation
912
- Chromosome Prodigal CDS 73445 73648 . + . ID=Additional_Annotation
913
- Chromosome Reference_Annotation CDS 73690 77685 . + . ID=Original_Annotation
914
- Chromosome Reference_Annotation CDS 77685 79085 . + . ID=Original_Annotation
915
- Chromosome Reference_Annotation CDS 79089 81035 . + . ID=Original_Annotation
916
- Chromosome Reference_Annotation CDS 81046 82596 . + . ID=Original_Annotation
917
- Chromosome Reference_Annotation CDS 82620 84044 . + . ID=Original_Annotation
918
- Chromosome Prodigal CDS 84082 84312 . + . ID=Additional_Annotation
919
- Chromosome Prodigal CDS 84532 84744 . - . ID=Additional_Annotation
920
- Chromosome Prodigal CDS 84776 85051 . + . ID=Additional_Annotation
846
+ # Run Date:2026-01-11
847
+ ##Genome DNA File:../../Test_Data/Genomes/E-coli/Escherichia_coli.fasta
848
+ ##Original File: ../../Test_Data/Genomes/E-coli/Escherichia_coli.gff
849
+ ##Additional File: ../../Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff
850
+ ERS715463SCcontig000003 Prodigal CDS 2 388 . + . ID=Additional_Annotations;Prodigal
851
+ ERS715463SCcontig000003 MGnify CDS 83 388 . + . ID=Original_Annotation;ID=ENSB_0kRwXBh8bjHtVl3;Parent=transcript:ENSB:0kRwXBh8bjHtVl3;protein_id=ENSB:0kRwXBh8bjHtVl3
852
+ ERS715463SCcontig000003 MGnify CDS 453 542 . + . ID=Original_Annotation;ID=ENSB_W8Go0tx9y9dAtng;Parent=transcript:ENSB:W8Go0tx9y9dAtng;protein_id=ENSB:W8Go0tx9y9dAtng;Matched_Annotations=Prodigal
921
853
  ```
922
854
 
923
- ### GFF-Intersector:
855
+ ### Annotation-Intersector:
924
856
 
925
- GFF-Intersector enables the aggregation of different genome annotations and CDS predictions and creates a single GFF
926
- representing the intersection of the two existing annotations.
927
- GFF-Intersector also provides an option to allow the retention of genes that have a user defined difference (minimum % coverage and in-frame).
928
- The ```-gi``` option can be used to allow for different genomic elements to be accounted for, other than only CDSs in the reference annotation.
857
+ Annotation-Intersector combines and contracts two existing annotations (GFF or other tool formats)
929
858
 
930
- For Help: ```GFF-Intersector -h ```
859
+ For Help: ```Annotation-Intersector -h ```
931
860
  ```python
932
- ORForise v1.6.0: GFF-Intersector Run Parameters.
861
+ Thank you for using ORForise
862
+ Please report any issues to: https://github.com/NickJD/ORForise/issues
863
+ #####
864
+ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
865
+ ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
866
+ OUTPUT_FILE [-dna GENOME_DNA]
867
+ [-rt REFERENCE_TOOL] [-gi GENE_IDENT]
868
+ [-cov COVERAGE] [--report-discordance]
869
+ [--report-discordance-file REPORT_DISCORDANCE_FILE]
870
+
871
+ ORForise v1.6.1: Annotation-Intersector Run Parameters
872
+
873
+ options:
874
+ -h, --help show this help message and exit
933
875
 
934
876
  Required Arguments:
935
- -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
936
877
  -ref REFERENCE_ANNOTATION
937
- Which reference annotation file to use as reference?
938
- -at ADDITIONAL_TOOL Which format to use for additional annotation?
878
+ Reference annotation GFF file
879
+ -at ADDITIONAL_TOOL Tool name/format for additional annotation (module
880
+ under Tools/)
939
881
  -add ADDITIONAL_ANNOTATION
940
- Which annotation file to add to reference annotation?
941
- -o OUTPUT_FILE Output filename
882
+ Additional annotation file to compare
883
+ -o OUTPUT_FILE Output GFF filename for kept genes
942
884
 
943
885
  Optional Arguments:
944
- -rt REFERENCE_TOOL Which tool format to use as reference? - If not provided, will default to standard Ensembl
945
- GFF format, can be Prodigal or any of the other tools available
946
- -gi GENE_IDENT Identifier used for extraction of "genic" regions from reference annotation "CDS,rRNA,tRNA":
947
- Default for is "CDS"
948
- -cov COVERAGE Percentage coverage of reference annotation needed to confirm intersection - Default: 100 ==
949
- exact match
886
+ -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based
887
+ on
888
+ -rt REFERENCE_TOOL Reference tool parser name (if not provided, GFF is
889
+ expected)
890
+ -gi GENE_IDENT Comma-separated feature types to consider from
891
+ reference (default: CDS)
892
+ -cov COVERAGE, --coverage COVERAGE
893
+ Percentage coverage threshold for intersection
894
+ (default 100)
895
+ --report-discordance If set, produce discordance reports (three GFFs)
896
+ --report-discordance-file REPORT_DISCORDANCE_FILE
897
+ Optional base path for discordance reports
898
+
950
899
  ```
951
900
 
952
- #### Example: Running GFF-Intersector to combine the additional CDS predictions made by Prodial to the canonical annotations from Ensembl.
953
- ``` GFF-Intersector -dna ~/Testing/Myco.fa -ref ~/Testing/Myco.gff -at Prodigal -add ~/Testing/Prodigal_Myco.gff -o ~/Testing/Myco_Ensembl_GFF_Intersector_Prodigal.gff```
901
+ #### Example: Running Annotation-Intersector to combine and contract annotations from multiple tools or reference files.
902
+ ``` Annotation-Intersector -ref .../ORForise/Tools/EasyGene/EasyGene_E-coli_E-coli.gff -rt EasyGene -at Prodigal -add .../ORForise/Tools/Prodigal/Prodigal_E-coli.gff -o .../Test_Data/Annotation-Intersector/Annotation-Intersect.gff --report-discordance ```
954
903
 
955
- #### Example Output: ~/Testing/Myco_Ensembl_GFF_Intersector_Prodigal.gff
904
+ #### Example Output:
905
+ ##### .../Test_Data/Annotation-Intersector/Annotation-Intersect.gff
906
+ ```
907
+ ##gff-version 3
908
+ # Annotation-Intersector
909
+ # Run Date:2026-01-09
910
+ ##Original File: .../ORForise/Tools/EasyGene/EasyGene_E-coli_E-coli.gff
911
+ ##Intersecting File: .../ORForise/Tools/Prodigal/Prodigal_E-coli.gff
912
+ Chromosome EasyGene CDS 337 2799 . + . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
913
+ Chromosome EasyGene CDS 3734 5020 . + . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
914
+ Chromosome EasyGene CDS 5683 6459 . - . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
915
+ Chromosome EasyGene CDS 6529 7959 . - . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
916
+ Chromosome EasyGene CDS 8238 9191 . + . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
917
+ ```
918
+ #### .../Test_Data/Annotation-Intersector/Annotation-Intersect.only_in_reference.gff
919
+ ```
920
+ ##gff-version 3
921
+ # Annotation-Intersector discordance report
922
+ # Run Date:2026-01-09
923
+ ##Original File: EasyGene_E-coli_E-coli
924
+ Chromosome EasyGene CDS 408401 408484 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
925
+ Chromosome EasyGene CDS 1272584 1272886 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
926
+ Chromosome EasyGene CDS 2574901 2574960 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
927
+ Chromosome EasyGene CDS 2710019 2710081 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
928
+ ```
929
+ #### .../Test_Data/Annotation-Intersector/Annotation-Intersect.mismatches.gff
956
930
  ```
957
931
  ##gff-version 3
958
- # GFF-Intersector
959
- # Run Date:2021-11-10
960
- ##Genome DNA File:./Testing/Myco.fa
961
- ##Original File: ./Testing/Myco.gff
962
- ##Intersecting File: ./Testing/Prodigal_Myco.gff
963
- Chromosome original CDS 686 1828 . + . ID=Original_Annotation;Coverage=100
964
- Chromosome original CDS 4812 7322 . + . ID=Original_Annotation;Coverage=100
965
- Chromosome original CDS 8551 9183 . + . ID=Original_Annotation;Coverage=100
966
- Chromosome original CDS 22389 23558 . + . ID=Original_Annotation;Coverage=100
967
- Chromosome original CDS 29552 30124 . + . ID=Original_Annotation;Coverage=100
968
- Chromosome original CDS 31705 32325 . - . ID=Original_Annotation;Coverage=100
969
- Chromosome original CDS 49376 49642 . + . ID=Original_Annotation;Coverage=100
970
- Chromosome original CDS 59082 59753 . + . ID=Original_Annotation;Coverage=100
971
- Chromosome original CDS 61014 61406 . + . ID=Original_Annotation;Coverage=100
972
- Chromosome original CDS 82620 84044 . + . ID=Original_Annotation;Coverage=100
932
+ # Annotation-Intersector discordance report
933
+ # Run Date:2026-01-09
934
+ ##Original File: EasyGene_E-coli_E-coli
935
+ Chromosome EasyGene CDS 18715 19620 . . . Status=found_in_additional_but_below_coverage;Coverage=99.34;Ref_info=EasyGene;Add_info=Prodigal
936
+ Chromosome EasyGene CDS 19811 20314 . . . Status=found_in_additional_but_below_coverage;Coverage=75.00;Ref_info=EasyGene;Add_info=Prodigal
937
+ Chromosome EasyGene CDS 29624 30799 . . . Status=found_in_additional_but_below_coverage;Coverage=97.70;Ref_info=EasyGene;Add_info=Prodigal
938
+ Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_coverage;Coverage=98.99;Ref_info=EasyGene;Add_info=Prodigal
939
+
940
+ ```
941
+
942
+ #### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
943
+ For Help: ```Convert_To_GFF.py -h ```
973
944
  ```
945
+ Thank you for using ORForise
946
+ Please report any issues to: https://github.com/NickJD/ORForise/issues
947
+ #####
948
+ usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
974
949
 
950
+ ORForise v1.6.1: Convert-To-GFF Run Parameters
951
+
952
+ Required Arguments:
953
+ -dna GENOME_DNA Genome DNA file (.fa)
954
+ -i INPUT_ANNOTATION Input annotation file (tabular)
955
+ -fmt FORMAT Input format: blast, abricate, genemark
956
+ -o OUTPUT_DIR Output directory
957
+
958
+ Optional Arguments:
959
+ -gi GENE_IDENT Gene identifier types to extract (unused)
960
+ --verbose Verbose logging with logfile
961
+ ```
975
962
 
976
963
  # Genomes Available:
977
964
 
@@ -984,17 +971,17 @@ The .fa and .gff files (from Ensembl Bacteria Release 46) below are available in
984
971
  * *Pseudomonas fluorescens* - Strain UK4 - Assembly ASM73042v1
985
972
  * *Staphylococcus aureus* - Strain 502A - Assembly ASM59796v1
986
973
 
987
- # Prediction Tools Available:
988
974
 
989
- There are two Groups of tools - Those which do require a pre-built model and those which do not. \
990
- For the example runs provided, each tool is listed with the non-default options used and their predictions for each of the 6 model organisms are available in their respective
991
- directories.
992
- ORForise only needs the tool name and the annotation file produced from any available model to undertake the analysis.
993
975
 
994
- ## GFF Standard Format:
976
+ # Prediction Tool Formats Currently Available:
977
+ ORForise currently supports the comparison of multiple gene prediction tools via their output in GFF3 format. \
978
+ This can be used to compare different annotations with eachother or additional tools which use the GFF3 format.
979
+
980
+ ## Tool Specific Formats:
981
+ Run ```List-Tools``` to see the available tools. \
982
+ ORForise only needs the tool name and the annotation file produced from any compatible tool to undertake the analysis.
995
983
 
996
- The GFF Tool directory allows for the analysis of user-provided annotations in the standard GFF3 format. \
997
- This can be used to compare different cannonical annotations with eachother or additional tools which use the GFF3 format.
984
+ **If the tool uses another non-standard format, a request can be made to add it as an option via GitHub.**
998
985
 
999
986
  ## Model Based Tools:
1000
987
 
@@ -1018,16 +1005,16 @@ This tool has two comparisons with the organism models *E. coli - K12 - MG165* a
1018
1005
  **FragGeneScan - Version 1.3.0** - https://omics.informatics.indiana.edu/FragGeneScan/
1019
1006
  The 'complete' genome option was selected and GFF was chosen as output type.
1020
1007
 
1021
- **GeneMark HA - Version 3.25** - http://exon.gatech.edu/GeneMark/heuristic_gmhmmp.cgi
1008
+ **GeneMarkHA - Version 3.25** - http://exon.gatech.edu/GeneMark/heuristic_gmhmmp.cgi
1022
1009
  GFF was chosen as output type.
1023
1010
 
1024
1011
  **GeneMarkS - Version 4.25** - http://exon.gatech.edu/GeneMark/genemarks.cgi
1025
1012
  GFF was chosen as output type.
1026
1013
 
1027
- **GeneMarkS-2 - Version '2020'** - http://exon.gatech.edu/GeneMark/genemarks2.cgi
1014
+ **GeneMarkS2 - Version '2020'** - http://exon.gatech.edu/GeneMark/genemarks2.cgi
1028
1015
  GFF3 was chosen as output type.
1029
1016
 
1030
- **GLIMMER-3 - Version 3.02** - http://ccb.jhu.edu/software/glimmer/index.shtml
1017
+ **GLIMMER3 - Version 3.02** - http://ccb.jhu.edu/software/glimmer/index.shtml
1031
1018
  Default parameters from manual were used.
1032
1019
 
1033
1020
  **MetaGene - Version 2.24.0** - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1636498/