ORForise 1.6.0__tar.gz → 1.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {orforise-1.6.0 → orforise-1.6.1}/PKG-INFO +216 -229
- orforise-1.6.1/README.md +397 -0
- {orforise-1.6.0 → orforise-1.6.1}/pyproject.toml +6 -4
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Aggregate_Compare.py +2 -4
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Annotation_Compare.py +4 -7
- orforise-1.6.1/src/ORForise/Annotation_Intersector.py +726 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Convert_To_GFF.py +6 -5
- orforise-1.6.1/src/ORForise/GFF_Adder.py +543 -0
- orforise-1.6.1/src/ORForise/List_Tools.py +63 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/StORForise.py +8 -4
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/EasyGene/EasyGene.py +13 -1
- orforise-1.6.0/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py → orforise-1.6.1/src/ORForise/Tools/GLIMMER3/GLIMMER3.py +2 -2
- orforise-1.6.0/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py → orforise-1.6.1/src/ORForise/Tools/GeneMarkHA/GeneMarkHA.py +1 -1
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Prodigal/Prodigal.py +13 -1
- orforise-1.6.1/src/ORForise/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/utils.py +4 -1
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/PKG-INFO +216 -229
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/SOURCES.txt +15 -13
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/entry_points.txt +4 -2
- orforise-1.6.0/README.md +0 -410
- orforise-1.6.0/src/ORForise/GFF_Adder.py +0 -268
- orforise-1.6.0/src/ORForise/GFF_Intersector.py +0 -192
- {orforise-1.6.0 → orforise-1.6.1}/LICENSE +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/setup.cfg +0 -0
- {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- {orforise-1.6.0/src/ORForise/ORForise_Analysis → orforise-1.6.1/src/ORForise/Aux/StORF_Undetected/Completely_Undetected}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/StORF_Undetected/StORF_Undetected.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/Augustus → orforise-1.6.1/src/ORForise/Aux/StORF_Undetected}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/Balrog → orforise-1.6.1/src/ORForise/Aux/StORF_Undetected/unvitiated_Genes}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Aux}/TabToGFF/TabToGFF.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/EasyGene → orforise-1.6.1/src/ORForise/Aux/TabToGFF}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/FGENESB → orforise-1.6.1/src/ORForise/Aux}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Comparator.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/FragGeneScan → orforise-1.6.1/src/ORForise/ORForise_Analysis}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GFF → orforise-1.6.1/src/ORForise/Tools/Augustus}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GLIMMER_3 → orforise-1.6.1/src/ORForise/Tools/Balrog}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GeneMark → orforise-1.6.1/src/ORForise/Tools/EasyGene}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GeneMark_HA → orforise-1.6.1/src/ORForise/Tools/FGENESB}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GeneMark_HMM → orforise-1.6.1/src/ORForise/Tools/FragGeneScan}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GFF/GFF.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GeneMark_S → orforise-1.6.1/src/ORForise/Tools/GFF}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/GeneMark_S_2 → orforise-1.6.1/src/ORForise/Tools/GLIMMER3}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/MetaGene → orforise-1.6.1/src/ORForise/Tools/GeneMark}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/MetaGeneAnnotator → orforise-1.6.1/src/ORForise/Tools/GeneMarkHA}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/MetaGeneMark → orforise-1.6.1/src/ORForise/Tools/GeneMark_HMM}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/Prodigal → orforise-1.6.1/src/ORForise/Tools/GeneMark_S}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/Prokka → orforise-1.6.1/src/ORForise/Tools/GeneMark_S_2}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/StORF_Reporter → orforise-1.6.1/src/ORForise/Tools/MetaGene}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/StORF_Undetected/Completely_Undetected → orforise-1.6.1/src/ORForise/Tools/MetaGeneAnnotator}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/StORF_Undetected → orforise-1.6.1/src/ORForise/Tools/MetaGeneMark}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes → orforise-1.6.1/src/ORForise/Tools/Prodigal}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/TabToGFF → orforise-1.6.1/src/ORForise/Tools/Prokka}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools/TransDecoder → orforise-1.6.1/src/ORForise/Tools/StORF_Reporter}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
- {orforise-1.6.0/src/ORForise/Tools → orforise-1.6.1/src/ORForise/Tools/TransDecoder}/__init__.py +0 -0
- {orforise-1.6.0/src/ORForise → orforise-1.6.1/src/ORForise/Tools}/__init__.py +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/dependency_links.txt +0 -0
- {orforise-1.6.0 → orforise-1.6.1}/src/ORForise.egg-info/top_level.txt +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.6.
|
|
4
|
-
Summary: ORForise -
|
|
3
|
+
Version: 1.6.1
|
|
4
|
+
Summary: ORForise - A platform for analysing and comparing genome annotations.
|
|
5
5
|
Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
|
|
6
6
|
License: GNU GENERAL PUBLIC LICENSE
|
|
7
7
|
Version 3, 29 June 2007
|
|
@@ -639,61 +639,42 @@ Description-Content-Type: text/markdown
|
|
|
639
639
|
License-File: LICENSE
|
|
640
640
|
Dynamic: license-file
|
|
641
641
|
|
|
642
|
-
# ORForise -
|
|
642
|
+
# ORForise - Genome Annotation Analysis and Comparison Platform
|
|
643
643
|
## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
|
|
644
|
-
### Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
645
|
-
### Novel genome annotations can be compared to a provided reference annotation from Ensembl and predictions from other tools (or any given GFF annotation) .
|
|
646
644
|
|
|
647
645
|
# Requirements and Installation:
|
|
648
646
|
|
|
649
|
-
### The ORForise platform is written in Python (3.6-3
|
|
647
|
+
### The ORForise platform is written in Python (3.6-3.*) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
|
|
650
648
|
|
|
651
649
|
## Intallation:
|
|
652
650
|
|
|
653
|
-
###
|
|
654
|
-
### Consider using '--no-cache-dir' with pip to ensure the download of the newest version of the package.
|
|
655
|
-
|
|
656
|
-
## Required Files:
|
|
657
|
-
|
|
658
|
-
To run, you need:
|
|
659
|
-
* Input Genome FASTA and corresponding GFF file (or CDS predictions with the annotated genes for the genome you want to use as reference in one of the tool output formats listed below).
|
|
660
|
-
* A prediction output from one of the compatible tools for the same genome.
|
|
661
|
-
|
|
662
|
-
### How to add your own Genome:
|
|
663
|
-
|
|
664
|
-
Corresponding FASTA and GFF files must be provided for the genome the analysis is to be performed on, including the corresponding output of any tools to compare.
|
|
665
|
-
|
|
666
|
-
### How to add your own tool:
|
|
667
|
-
|
|
668
|
-
If the new tool reports its predictions in GFF you can present ORForise with "GFF" for either the reference ```-rt``` or prediction ```-t``` option.
|
|
669
|
-
If the tool uses another non-standard format, a request can be made to add it as an option via GitHub.
|
|
670
|
-
|
|
651
|
+
### ORForise is available via the pip Python package manager ```pip3 install ORForise``` and bioconda ```conda install -c bioconda ORForise```.
|
|
671
652
|
|
|
672
653
|
### Testing:
|
|
673
654
|
Precomputed testing and data which includes example input and output files for all tools presented below is available in the `~ORForise/Testing` directory of the GitHub repository.
|
|
674
|
-
Example output files from ```Annotation-Compare```, ```
|
|
655
|
+
Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```Convert-To-GFF``` and ```Annotation-Intersector``` are available.
|
|
675
656
|
|
|
676
657
|
|
|
677
|
-
##
|
|
658
|
+
## Genome Annotation Analysis:
|
|
678
659
|
|
|
679
660
|
### Use-cases: (Running if via pip)
|
|
680
661
|
|
|
681
662
|
For Help: ```Annotation-Compare -h ```
|
|
682
663
|
|
|
683
664
|
```python
|
|
684
|
-
ORForise v1.6.
|
|
665
|
+
ORForise v1.6.1: Annotatione-Compare Run Parameters.
|
|
685
666
|
|
|
686
667
|
Required Arguments:
|
|
687
668
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
688
669
|
-ref REFERENCE_ANNOTATION
|
|
689
670
|
Which reference annotation file to use as reference?
|
|
690
|
-
-t TOOL Which tool to analyse?
|
|
691
|
-
-tp TOOL_PREDICTION Tool genome prediction file (.gff) - Different Tool Parameters are compared individually via
|
|
692
|
-
separate files
|
|
671
|
+
-t TOOL Which tool to analyse?
|
|
672
|
+
-tp TOOL_PREDICTION Tool genome prediction file (.gff) - Different Tool Parameters are compared individually via separate files
|
|
693
673
|
|
|
694
674
|
Optional Arguments:
|
|
695
|
-
-
|
|
696
|
-
|
|
675
|
+
-gene_ident GENE_IDENT
|
|
676
|
+
What features to consider as genes? - Default: CDS - Provide comma separated list of features to consider as genes (e.g. CDS,exon)
|
|
677
|
+
-rt REFERENCE_TOOL What type of Annotation to compare to? -- Leave blank for Ensembl reference- Provide tool name to compare output from two tools
|
|
697
678
|
|
|
698
679
|
Output:
|
|
699
680
|
-o OUTDIR Define directory where detailed output should be places
|
|
@@ -702,18 +683,42 @@ Output:
|
|
|
702
683
|
Misc:
|
|
703
684
|
-v {True,False} Default - False: Print out runtime status
|
|
704
685
|
```
|
|
686
|
+
### Compare a *de novo* genome annotation to an Ensembl annotation:
|
|
705
687
|
|
|
706
|
-
|
|
707
|
-
|
|
708
|
-
Genome annotation is a difficult process, even for Prokaryotes. ORForise allows the direct and systematic analysis of
|
|
709
|
-
a novel CDS prediction from a wide selection of tools to a reference Genome Annotation, such as those provided by
|
|
688
|
+
Genome annotation is a difficult process, even for Prokaryotes. ORForise allows for the direct and systematic analysis of
|
|
689
|
+
*de novo* gene prediction from a wide selection of tools to a reference Genome Annotation, such as those provided by
|
|
710
690
|
Ensembl Bacteria.
|
|
711
691
|
|
|
712
|
-
#### Example: Installation through pip will allow user to call the programs directly from the ORForise package.
|
|
692
|
+
#### Example: Installation through pip will allow user to call the programs directly from the ORForise package (Prodigal and Pyrodigal provide annotations in the same format).
|
|
713
693
|
```python
|
|
714
|
-
Annotation-Compare -dna ~/
|
|
694
|
+
Annotation-Compare -dna ~/Test_Data/Genomes/E-coli/Escherichia_coli.fasta -ref ~/Test_Data/Genomes/E-coli/Escherichia_coli.gff -t Prodigal -tp ~/Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff
|
|
695
|
+
```
|
|
696
|
+
### Example Output: - See ```~/Test_Data/Genomes/E-coli/annotation_compare```
|
|
697
|
+
```commandline
|
|
698
|
+
Genome Used: Escherichia_coli.fasta
|
|
699
|
+
Reference Used: Escherichia_coli.gff
|
|
700
|
+
Tool Compared: Prodigal
|
|
701
|
+
Total Number of Reference Genes: 5222
|
|
702
|
+
Number of Contigs: 4
|
|
703
|
+
Contig Genes ORFs Perfect_Matches Partial_Matches Missed_Genes Unmatched_ORFs Multi_Matched_ORFs
|
|
704
|
+
ERS715463SCcontig000003 4068 4070 4065 1 2 4 0
|
|
705
|
+
ERS715463SCcontig000002 1033 1035 1033 0 0 2 0
|
|
706
|
+
ERS715463SCcontig000001 75 77 75 0 0 2 0
|
|
707
|
+
ERS715463SCcontig000004 46 47 45 1 0 1 0
|
|
708
|
+
|
|
709
|
+
Overall Summary:
|
|
710
|
+
Number of Genes: 5222
|
|
711
|
+
Number of ORFs: 5229
|
|
712
|
+
Perfect Matches: 5218 [5222] - 99.92%
|
|
713
|
+
Partial Matches: 2 [5222] - 0.04%
|
|
714
|
+
Missed Genes: 2 [5222] - 0.04%
|
|
715
|
+
Unmatched ORFs: 9 [5222] - 0.17%
|
|
716
|
+
Multi-matched ORFs: 0 [5222] - 0.00%
|
|
717
|
+
|
|
715
718
|
```
|
|
716
|
-
|
|
719
|
+
|
|
720
|
+
|
|
721
|
+
## Compare different novel annotations with each other on a single Genome:
|
|
717
722
|
|
|
718
723
|
If a reference Genome Annotation is not available or a direct comparison between two or more tools is wanted,
|
|
719
724
|
ORForise can be used as the example below.
|
|
@@ -725,253 +730,235 @@ ORForise can be used as the example below.
|
|
|
725
730
|
For Help: ```Aggregate-Compare -h ```
|
|
726
731
|
|
|
727
732
|
```python
|
|
728
|
-
ORForise v1.6.
|
|
733
|
+
ORForise v1.6.1: Aggregate-Compare Run Parameters.
|
|
729
734
|
|
|
730
735
|
Required Arguments:
|
|
731
736
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
732
|
-
-t TOOLS Which tools to analyse?
|
|
737
|
+
-t TOOLS Which tools to analyse?
|
|
733
738
|
-tp TOOL_PREDICTIONS Tool genome prediction file (.gff) - Providefile locations for each tool comma separated
|
|
734
739
|
-ref REFERENCE_ANNOTATION
|
|
735
740
|
Which reference annotation file to use as reference?
|
|
736
741
|
|
|
737
742
|
Optional Arguments:
|
|
738
|
-
-
|
|
739
|
-
|
|
743
|
+
-gene_ident GENE_IDENT
|
|
744
|
+
What features to consider as genes? - Default: CDS - Provide comma separated list of features to consider as genes (e.g. CDS,exon)
|
|
745
|
+
-rt REFERENCE_TOOL What type of Annotation to compare to? -- Leave blank for Ensembl reference- Provide tool name to compare output from two tools
|
|
740
746
|
|
|
741
747
|
Output:
|
|
742
|
-
-o
|
|
743
|
-
|
|
748
|
+
-o OUTDIR Define directory where detailed output should be places - If not provided, summary will be printed to std-out
|
|
749
|
+
-n OUTNAME Define output file name - Mandatory is -o is provided: <outname>_<contig_id>_ORF_Comparison.csv
|
|
744
750
|
|
|
745
751
|
Misc:
|
|
746
752
|
-v {True,False} Default - False: Print out runtime status
|
|
753
|
+
|
|
747
754
|
```
|
|
748
755
|
|
|
749
756
|
#### Example:
|
|
750
757
|
```python
|
|
751
|
-
Aggregate-Compare -ref ~/
|
|
758
|
+
Aggregate-Compare -ref ~/Test_Data/Genomes/E-coli/Escherichia_coli.gff -dna ~/Test_Data/Genomes/E-coli/Escherichia_coli.fasta -t Prodigal,GeneMarkS2 -tp ~/Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff,~/Test_Data/Genomes/E-coli/GeneMarkS2_E-coli.gff
|
|
752
759
|
```
|
|
753
|
-
This will compare
|
|
754
|
-
Ensembl Bacteria.
|
|
760
|
+
This will compare and agregate the predictions of Prodigal and GeneMarkS2 against the E-coli reference annotation provided by Ensembl Bacteria.
|
|
755
761
|
|
|
756
|
-
|
|
757
|
-
### Print to screen example - Prodigal prediction compared to Ensembl Bacteria reference annotation of *Escherichia coli*:
|
|
762
|
+
### Annotation Comparison Output - The output format is the same for Annotation_Compare and Aggregate_Compare: See ```~/Test_Data/Genomes/E-coli/aggregate_compare```
|
|
758
763
|
```bash
|
|
759
|
-
|
|
760
|
-
|
|
761
|
-
|
|
762
|
-
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
769
|
-
|
|
770
|
-
|
|
771
|
-
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
780
|
-
|
|
781
|
-
|
|
782
|
-
Tools Compared: Prodigal,TransDecoder,GeneMark_S_2
|
|
783
|
-
Perfect Matches:132[476]
|
|
784
|
-
Partial Matches:58[476]
|
|
785
|
-
Missed Genes:286[476]
|
|
764
|
+
Genome Used: Escherichia_coli.fasta
|
|
765
|
+
Reference Used: Escherichia_coli.gff
|
|
766
|
+
Tool Compared: Prodigal,GeneMarkS2
|
|
767
|
+
Total Number of Reference Genes: 5222
|
|
768
|
+
Number of Contigs: 4
|
|
769
|
+
Contig Genes ORFs Perfect_Matches Partial_Matches Missed_Genes Unmatched_ORFs Multi_Matched_ORFs
|
|
770
|
+
ERS715463SCcontig000003 4068 4500 4065 1 2 434 0
|
|
771
|
+
ERS715463SCcontig000002 1033 1148 1033 0 0 115 0
|
|
772
|
+
ERS715463SCcontig000001 75 92 75 0 0 17 0
|
|
773
|
+
ERS715463SCcontig000004 46 64 45 1 0 18 0
|
|
774
|
+
|
|
775
|
+
Overall Summary:
|
|
776
|
+
Number of Genes: 5222
|
|
777
|
+
Number of ORFs: 5804
|
|
778
|
+
Perfect Matches: 5218 [5222] - 99.92%
|
|
779
|
+
Partial Matches: 2 [5222] - 0.04%
|
|
780
|
+
Missed Genes: 2 [5222] - 0.04%
|
|
781
|
+
Unmatched ORFs: 584 [5222] - 11.18%
|
|
782
|
+
Multi-matched ORFs: 0 [5222] - 0.00%
|
|
783
|
+
|
|
784
|
+
Prodigal: Perfect=5218, Partial=2, Unmatched=9, Multi-matched=0
|
|
785
|
+
|
|
786
|
+
GeneMarkS2: Perfect=4609, Partial=2, Unmatched=579, Multi-matched=0
|
|
786
787
|
```
|
|
787
788
|
|
|
788
|
-
|
|
789
|
-
|
|
790
|
-
|
|
791
|
-
The output is designed to be human-readable and interpretable by the included 'ORForise_Analysis' scripts.
|
|
792
|
-
The example below presents the 12 'Representative' and 72 'All' Metrics but only shows one entry for each of the induvidual prediction reports (Perfect_Match_Genes,Partial_Match_Genes,Missed_Genes,Predicted_CDS_Without_Corresponding_Gene_in_Reference,Predicted_CDSs_Which_Detected_more_than_one_Gene).
|
|
793
|
-
|
|
794
|
-
```csv
|
|
789
|
+
Shown so far have been the summary outputs of the comparison tools.
|
|
790
|
+
Since v 1.5.0, detailed CSV outputs are also provided for each contig analysed - See ```~/Test_Data/Genomes/E-coli/annotation_compare``` for example outputs.
|
|
791
|
+
```commandline
|
|
795
792
|
Representative_Metrics:
|
|
796
793
|
Percentage_of_Genes_Detected,Percentage_of_ORFs_that_Detected_a_Gene,Percent_Difference_of_All_ORFs,Median_Length_Difference,Percentage_of_Perfect_Matches,Median_Start_Difference_of_Matched_ORFs,Median_Stop_Difference_of_Matched_ORFs,Percentage_Difference_of_Matched_Overlapping_CDSs,Percent_Difference_of_Short-Matched-ORFs,Precision,Recall,False_Discovery_Rate
|
|
797
|
-
|
|
798
|
-
|
|
794
|
+
100.00,97.87,2.17,1.20,97.83,6.0,N/A,0.00,0.00,0.98,1.00,0.02
|
|
795
|
+
Prediction_Metrics:
|
|
799
796
|
Number_of_ORFs,Percent_Difference_of_All_ORFs,Number_of_ORFs_that_Detected_a_Gene,Percentage_of_ORFs_that_Detected_a_Gene,Number_of_Genes_Detected,Percentage_of_Genes_Detected,Median_Length_of_All_ORFs,Median_Length_Difference,Minimum_Length_of_All_ORFs,Minimum_Length_Difference,Maximum_Length_of_All_ORFs,Maximum_Length_Difference,Median_GC_content_of_All_ORFs,Percent_Difference_of_All_ORFs_Median_GC,Median_GC_content_of_Matched_ORFs,Percent_Difference_of_Matched_ORF_GC,Number_of_ORFs_which_Overlap_Another_ORF,Percent_Difference_of_Overlapping_ORFs,Maximum_ORF_Overlap,Median_ORF_Overlap,Number_of_Matched_ORFs_Overlapping_Another_ORF,Percentage_Difference_of_Matched_Overlapping_CDSs,Maximum_Matched_ORF_Overlap,Median_Matched_ORF_Overlap,Number_of_Short-ORFs,Percent_Difference_of_Short-ORFs,Number_of_Short-Matched-ORFs,Percent_Difference_of_Short-Matched-ORFs,Number_of_Perfect_Matches,Percentage_of_Perfect_Matches,Number_of_Perfect_Starts,Percentage_of_Perfect_Starts,Number_of_Perfect_Stops,Percentage_of_Perfect_Stops,Number_of_Out_of_Frame_ORFs,Number_of_Matched_ORFs_Extending_a_Coding_Region,Percentage_of_Matched_ORFs_Extending_a_Coding_Region,Number_of_Matched_ORFs_Extending_Start_Region,Percentage_of_Matched_ORFs_Extending_Start_Region,Number_of_Matched_ORFs_Extending_Stop_Region,Percentage_of_Matched_ORFs_Extending_Stop_Region,Number_of_All_ORFs_on_Positive_Strand,Percentage_of_All_ORFs_on_Positive_Strand,Number_of_All_ORFs_on_Negative_Strand,Percentage_of_All_ORFs_on_Negative_Strand,Median_Start_Difference_of_Matched_ORFs,Median_Stop_Difference_of_Matched_ORFs,ATG_Start_Percentage,GTG_Start_Percentage,TTG_Start_Percentage,ATT_Start_Percentage,CTG_Start_Percentage,Other_Start_Codon_Percentage,TAG_Stop_Percentage,TAA_Stop_Percentage,TGA_Stop_Percentage,Other_Stop_Codon_Percentage,True_Positive,False_Positive,False_Negative,Precision,Recall,False_Discovery_Rate,Nucleotide_True_Positive,Nucleotide_False_Positive,Nucleotide_True_Negative,Nucleotide_False_Negative,Nucleotide_Precision,Nucleotide_Recall,Nucleotide_False_Discovery_Rate,ORF_Nucleotide_Coverage_of_Genome,Matched_ORF_Nucleotide_Coverage_of_Genome
|
|
800
|
-
|
|
801
|
-
|
|
802
|
-
|
|
803
|
-
|
|
804
|
-
|
|
805
|
-
|
|
806
|
-
|
|
807
|
-
Alternative_Starts_Predicted:
|
|
808
|
-
|
|
809
|
-
Alternative_Stops_Predicted:
|
|
810
|
-
|
|
811
|
-
Undetected_Gene_Metrics:
|
|
812
|
-
ATG_Start ,GTG_Start ,TTG_Start ,ATT_Start ,CTG_Start ,Alternative_Start_Codon ,TGA_Stop ,TAA_Stop ,TAG_Stop ,Alternative_Stop_Codon ,Median_Length ,ORFs_on_Positive_Strand ,ORFs_on_Negative_Strand
|
|
813
|
-
88.46,7.69,3.85,0.00,0.00,0.00,0.00,74.13,25.87,0.00,1047.50,156,130
|
|
814
|
-
Perfect_Match_Genes:
|
|
815
|
-
>Myco_686_1828_+
|
|
816
|
-
ATGAAAATATTAATTAATAAAAGTGAATTGAATAAAATTTTGAAAAAAATGAATAACGTTATTATTTCCAATAACAAAATAAAACCACATCATTCATATTTTTTAATAGAGGCAAAAGAAAAAGAAATAAACTTTTATGCTAACAATGAATACTTTTCTGTCAAATGTAATTTAAATAAAAATATTGATATTCTTGAACAAGGCTCCTTAATTGTTAAAGGAAAAATTTTTAACGATCTTATTAATGGCATAAAAGAAGAGATTATTACTATTCAAGAAAAAGATCAAACACTTTTGGTTAAAACAAAAAAAACAAGTATTAATTTAAACACAATTAATGTGAATGAATTTCCAAGAATAAGGTTTAATGAAAAAAACGATTTAAGTGAATTTAATCAATTCAAAATAAATTATTCACTTTTAGTAAAAGGCATTAAAAAAATTTTTCACTCAGTTTCAAATAATCGTGAAATATCTTCTAAATTTAATGGAGTAAATTTCAATGGATCCAATGGAAAAGAAATATTTTTAGAAGCTTCTGACACTTATAAACTATCTGTTTTTGAGATAAAGCAAGAAACAGAACCATTTGATTTCATTTTGGAGAGTAATTTACTTAGTTTCATTAATTCTTTTAATCCTGAAGAAGATAAATCTATTGTTTTTTATTACAGAAAAGATAATAAAGATAGCTTTAGTACAGAAATGTTGATTTCAATGGATAACTTTATGATTAGTTACACATCGGTTAATGAAAAATTTCCAGAGGTAAACTACTTTTTTGAATTTGAACCTGAAACTAAAATAGTTGTTCAAAAAAATGAATTAAAAGATGCACTTCAAAGAATTCAAACTTTGGCTCAAAATGAAAGAACTTTTTTATGCGATATGCAAATTAACAGTTCTGAATTAAAAATAAGAGCTATTGTTAATAATATCGGAAATTCTCTTGAGGAAATTTCTTGTCTTAAATTTGAAGGTTATAAACTTAATATTTCTTTTAACCCAAGTTCTCTATTAGATCACATAGAGTCTTTTGAATCAAATGAAATAAATTTTGATTTCCAAGGAAATAGTAAGTATTTTTTGATAACCTCTAAAAGTGAACCTGAACTTAAGCAAATATTGGTTCCTTCAAGATAA
|
|
817
|
-
|
|
818
|
-
>Myco_4812_7322_+
|
|
819
|
-
ATGGCAAAGCAACAAGATCAAGTAGATAAGATTCGTGAAAACTTAGACAATTCAACTGTCAAAAGTATTTCATTAGCAAATGAACTTGAGCGTTCATTCATGGAATATGCTATGTCAGTTATTGTTGCTCGTGCTTTACCTGATGCTAGAGATGGACTTAAACCAGTTCATCGTCGTGTTCTTTATGGTGCTTATATTGGTGGCATGCACCATGATCGTCCTTTTAAAAAGTCTGCGAGGATTGTTGGTGATGTAATGAGTAAATTCCACCCTCATGGTGATATGGCAATATATGACACCATGTCAAGAATGGCTCAAGACTTTTCATTAAGATACCTTTTAATTGATGGTCATGGTAATTTTGGTTCTATAGATGGTGATAGACCTGCTGCACAACGTTATACAGAAGCAAGATTATCTAAACTTGCAGCAGAACTTTTAAAAGATATTGATAAAGATACAGTTGACTTTATTGCTAATTATGATGGTGAGGAAAAAGAACCAACTGTTCTACCAGCAGCTTTCCCTAACTTACTTGCAAATGGTTCTAGTGGGATTGCAGTTGGAATGTCAACATCTATTCCTTCCCATAATCTCTCTGAATTAATTGCGGGTTTAATCATGTTAATTGATAATCCTCAATGCACTTTTCAAGAATTATTAACTGTAATTAAAGGACCTGATTTTCCAACAGGAGCTAACATTATCTACACAAAAGGAATTGAAAGCTACTTTGAAACAGGTAAAGGCAATGTAGTAATTCGTTCTAAAGTTGAGATAGAACAATTGCAAACAAGAAGTGCATTAGTTGTAACTGAAATTCCTTACATGGTTAACAAAACTACCTTAATTGAAAAGATTGTAGAACTTGTTAAAGCTGAAGAGATTTCAGGAATTGCTGATATCCGTGATGAATCCTCTCGAGAAGGAATAAGGTTAGTGATTGAAGTAAAACGCGACACTGTACCTGAAGTTTTATTAAATCAACTTTTTAAATCAACAAGATTACAAGTACGCTTCCCTGTTAATATGCTTGCTTTAGTTAAAGGAGCTCCTGTACTTCTCAACATGAAACAAGCTTTGGAAGTATATCTTGATCATCAAATTGATGTTCTTGTTAGAAAAACAAAGTTTGTGCTTAATAAACAACAAGAACGTTATCACATTTTAAGCGGACTTTTAATTGCTGCTTTAAATATTGATGAGGTTGTTGCAATTATTAAAAAATCAGCAAATAACCAGGAAGCAATTAATACATTAAATACAAAGTTTAAGCTTGATGAAATTCAAGCTAAAGCAGTTCTTGACATGCGTTTAAGGAGCTTAAGCGTACTTGAAGTTAACAAACTTCAAACTGAACAAAAAGAGTTAAAAGATTCAATTGAATTTTGTAAGAAAGTGTTAGCTGATCAAAAATTACAGCTAAAAATAATCAAAGAGGAATTGCAAAAAATCAATGATCAGTTTGGTGATGAAAGAAGAAGTGAAATTCTCTATGATATCTCTGAGGAAATTGATGATGAATCATTGATAAAAGTTGAGAATGTAGTGATAACTATGTCTACAAATGGTTATCTAAAAAGGATTGGAGTTGATGCTTATAATCTTCAACATCGTGGTGGAGTTGGGGTTAAAGGGCTAACTACTTATGTTGATGATAGTATTAGTCAATTATTGGTCTGTTCAACTCACTCTGACTTATTATTTTTTACTGATAAGGGTAAGGTTTATAGAATTAGAGCTCATCAAATTCCCTATGGTTTTAGAACAAATAAAGGTATTCCCGCTGTTAACTTAATCAAAATTGAAAAGGATGAAAGAATTTGTTCATTGTTATCTGTTAATAACTATGATGATGGTTATTTCTTTTTCTGTACTAAAAATGGAATTGTTAAAAGAACGAGCTTGAATGAATTCATCAACATCTTAAGTAATGGTAAGCGGGCTATATCTTTTGATGATAATGACACTTTGTATTCAGTAATTAAAACCCACGGAAATGATGAGATTTTTATTGGTTCTACCAATGGATTTGTTGTTCGCTTCCATGAAAATCAACTCAGAGTTCTTTCAAGAACAGCAAGAGGTGTATTTGGTATCAGTTTAAATAAAGGAGAATTTGTTAATGGACTATCAACTTCAAGCAACGGTAGCTTACTTTTATCAGTCGGTCAAAATGGAATAGGTAAATTAACGAGCATAGATAAATATAGACTCACAAAACGTAATGCTAAGGGAGTTAAAACTCTAAGGGTTACTGATAGAACAGGCCCTGTTGTTACAACAACCACTGTTTTTGGTAATGAGGATCTTTTAATGATTTCCTCTGCTGGTAAAATTGTGCGTACCAGTTTACAAGAACTTTCAGAACAAGGTAAAAACACTTCTGGTGTTAAGTTAATTAGATTAAAAGATAATGAACGTTTAGAAAGAGTAACTATCTTTAAAGAAGAGTTAGAAGACAAAGAAATGCAACTAGAAGATGTTGGATCCAAACAAATTACGCAATAA
|
|
820
|
-
.........
|
|
821
|
-
Partial_Match_Genes:
|
|
822
|
-
Gene:9923_11251_+_ATG_TAA
|
|
823
|
-
ATGAAAAGCGAAATTAATATTTTTGCACTAGCAACTGCACCTTTTAATAGTGCATTACATATTATTAGGTTTTCTGGTCCTGATGTTTATGAGATTTTAAACAAGATAACTAATAAAAAAATAACAAGAAAAGGGATGCAAATTCAACGCACATGGATAGTTGATGAAAACAATAAGCGAATTGATGATGTGCTATTATTTAAATTTGTCTCTCCAAATTCTTATACAGGAGAAGATTTAATTGAAATTTCTTGTCATGGTAACATGTTGATCGTTAATGAAATTTGCGCACTTCTTTTAAAAAAAGGAGGTGTTTATGCCAAACCTGGTGAATTTACCCAAAGGAGTTTTTTAAATGGAAAAATGAGTTTACAACAAGCTAGTGCTGTAAATAAATTGATTTTATCTCCTAACTTATTAGTTAAAGATATAGTCTTAAATAATTTAGCGGGTGAAATGGATCAACAATTAGAACAAATAGCTCAACAAGTTAATCAATTAGTAATGCAAATGGAAGTAAACATTGATTATCCAGAATATCTTGATGAACAAGTAGAACTATCAACTTTAAATAATAAAGTTAAATTGATTATTGAAAAGCTTAAAAGAATTATTGAAAATAGTAAACAACTCAAAAAACTTCACGATCCTTTTAAAATTGCCATTATAGGCGAAACTAATGTAGGTAAATCTTCTTTACTCAACGCTTTATTAAATCAAGATAAAGCGATAGTTTCAAATATTAAAGGTAGTACACGCGATGTTGTTGAAGGGGATTTCAATTTAAATGGTTATTTAATCAAGATCTTAGATACTGCAGGTATCCGTAAACATAAAAGTGGGCTTGAAAAAGCAGGAATTAAAAAAAGCTTTGAATCTATAAAGCAAGCTAATTTGGTTATTTATCTTTTAGATGCAACACATCCAAAGAAAGATCTTGAATTAATTAGTTTTTTTAAGAAAAATAAAAAGGATTTTTTTGTTTTCTATAACAAAAAAGATTTAATTACAAATAAGTTTGAAAATAGTATTTCTGCAAAGCAAAAAGATATTAAAGAATTAGTTGATTTATTAACTAAATATATTAACGAGTTTTATAAAAAAATAGATCAAAAAATCTATCTGATTGAAAATTGACAGCAAATTTTAATTGAAAAAATTAAAGAACAATTAGAACAGTTTTTAAAGCAACAAAAAAAATATTTATTTTTCGATGTTTTAGTTACCCATCTAAGAGAAGCTCAACAAGATATTCTTAAACTACTAGGTAAGGATGTAGGTTTTGATTTAGTTAATGAAATTTTTAATAATTTTTGTTTAGGAAAATAA
|
|
824
|
-
ORF:9923_11059_+_ATG_TGA
|
|
825
|
-
ATGAAAAGCGAAATTAATATTTTTGCACTAGCAACTGCACCTTTTAATAGTGCATTACATATTATTAGGTTTTCTGGTCCTGATGTTTATGAGATTTTAAACAAGATAACTAATAAAAAAATAACAAGAAAAGGGATGCAAATTCAACGCACATGGATAGTTGATGAAAACAATAAGCGAATTGATGATGTGCTATTATTTAAATTTGTCTCTCCAAATTCTTATACAGGAGAAGATTTAATTGAAATTTCTTGTCATGGTAACATGTTGATCGTTAATGAAATTTGCGCACTTCTTTTAAAAAAAGGAGGTGTTTATGCCAAACCTGGTGAATTTACCCAAAGGAGTTTTTTAAATGGAAAAATGAGTTTACAACAAGCTAGTGCTGTAAATAAATTGATTTTATCTCCTAACTTATTAGTTAAAGATATAGTCTTAAATAATTTAGCGGGTGAAATGGATCAACAATTAGAACAAATAGCTCAACAAGTTAATCAATTAGTAATGCAAATGGAAGTAAACATTGATTATCCAGAATATCTTGATGAACAAGTAGAACTATCAACTTTAAATAATAAAGTTAAATTGATTATTGAAAAGCTTAAAAGAATTATTGAAAATAGTAAACAACTCAAAAAACTTCACGATCCTTTTAAAATTGCCATTATAGGCGAAACTAATGTAGGTAAATCTTCTTTACTCAACGCTTTATTAAATCAAGATAAAGCGATAGTTTCAAATATTAAAGGTAGTACACGCGATGTTGTTGAAGGGGATTTCAATTTAAATGGTTATTTAATCAAGATCTTAGATACTGCAGGTATCCGTAAACATAAAAGTGGGCTTGAAAAAGCAGGAATTAAAAAAAGCTTTGAATCTATAAAGCAAGCTAATTTGGTTATTTATCTTTTAGATGCAACACATCCAAAGAAAGATCTTGAATTAATTAGTTTTTTTAAGAAAAATAAAAAGGATTTTTTTGTTTTCTATAACAAAAAAGATTTAATTACAAATAAGTTTGAAAATAGTATTTCTGCAAAGCAAAAAGATATTAAAGAATTAGTTGATTTATTAACTAAATATATTAACGAGTTTTATAAAAAAATAGATCAAAAAATCTATCTGATTGAAAATTGA
|
|
826
|
-
|
|
827
|
-
Gene:11251_12039_+_ATG_TAA
|
|
828
|
-
ATGGAATACTTTGATGCACATTGTCATTTAAATTGTGAACCTTTACTGAGTGAAATTGAAAAAAGCATCGCTAATTTCAAATTAATTAATTTAAAAGCAAATGTTGTAGGTACAGATTTGGATAATTCTAAAATTGCTGTTGAATTAGCTAAAAAATATCCTGATCTTTTAAAAGCAACCATAGGTATCCATCCAAATGATGTTCATTTAGTTGATTTTAAAAAGACAAAAAAACAACTTAATGAACTATTAATAAATAACAGAAATTTCATAAGTTGTATTGGTGAATATGGTTTTGATTATCACTACACAACAGAATTTATTGAATTGCAAAACAAATTCTTTGAGATGCAATTTGAAATAGCTGAAACTAATAAATTGGTTCACATGCTTCATATTCGTGATGCTCATGAAAAAATTTATGAAATATTAACAAGATTAAAGCCAACTCAACCTGTGATTTTTCATTGTTTCAGTCAAGATATAAATATTGCTAAAAAGCTACTATCATTAAAAGATTTAAATATTGACATCTTCTTTTCTATCCCAGGGATAGTTACTTTTAAGAATGCTCAAGCATTACATGAAGCTTTAAAGATTATTCCTAGTGAATTACTTTTAAGTGAAACTGACTCACCGTGATTAACCCCTTCTCCTTTTCGAGGCAAAGTTAACTGACCTGAATATGTAGTTCATACTGTTAGCACTGTTGCTGAAATAAAAAAAATAGAAATTGCTGAAATGAAGCGAATTATTGTTAAAAATGCAAAAAAATTATTTTGACATTAA
|
|
829
|
-
ORF:11251_11892_+_ATG_TGA
|
|
830
|
-
ATGGAATACTTTGATGCACATTGTCATTTAAATTGTGAACCTTTACTGAGTGAAATTGAAAAAAGCATCGCTAATTTCAAATTAATTAATTTAAAAGCAAATGTTGTAGGTACAGATTTGGATAATTCTAAAATTGCTGTTGAATTAGCTAAAAAATATCCTGATCTTTTAAAAGCAACCATAGGTATCCATCCAAATGATGTTCATTTAGTTGATTTTAAAAAGACAAAAAAACAACTTAATGAACTATTAATAAATAACAGAAATTTCATAAGTTGTATTGGTGAATATGGTTTTGATTATCACTACACAACAGAATTTATTGAATTGCAAAACAAATTCTTTGAGATGCAATTTGAAATAGCTGAAACTAATAAATTGGTTCACATGCTTCATATTCGTGATGCTCATGAAAAAATTTATGAAATATTAACAAGATTAAAGCCAACTCAACCTGTGATTTTTCATTGTTTCAGTCAAGATATAAATATTGCTAAAAAGCTACTATCATTAAAAGATTTAAATATTGACATCTTCTTTTCTATCCCAGGGATAGTTACTTTTAAGAATGCTCAAGCATTACATGAAGCTTTAAAGATTATTCCTAGTGAATTACTTTTAAGTGAAACTGACTCACCGTGA
|
|
831
|
-
.......
|
|
832
|
-
Missed_Genes:
|
|
833
|
-
>Myco_1828_2760_+
|
|
834
|
-
ATGAATCTTTACGATCTTTTAGAACTACCAACTACAGCATCAATAAAAGAAATAAAAATTGCTTATAAAAGATTAGCAAAGCGTTATCACCCTGATGTAAATAAATTAGGTTCGCAAACTTTTGTTGAAATTAATAATGCTTATTCAATATTAAGTGATCCTAACCAAAAGGAAAAATATGATTCAATGCTGAAAGTTAATGATTTTCAAAATCGCATCAAAAATTTAGATATTAGTGTTAGATGACATGAAAATTTCATGGAAGAACTCGAACTTCGTAAGAACTGAGAATTTGATTTTTTTTCATCTGATGAAGATTTCTTTTATTCTCCATTTACAAAAAACAAATATGCTTCCTTTTTAGATAAAGATGTTTCTTTAGCTTTTTTTCAGCTTTACAGCAAGGGCAAAATAGATCATCAATTGGAAAAATCTTTATTGAAAAGAAGAGATGTAAAAGAAGCTTGTCAACAGAATAAAAATTTTATTGAAGTTATAAAAGAGCAATATAACTATTTTGGTTGAATTGAAGCTAAGCGTTATTTCAATATTAATGTTGAACTTGAGCTCACACAGAGAGAGATAAGAGATAGAGATGTTGTTAACCTACCTTTAAAAATTAAAGTTATTAATAATGATTTTCCAAATCAACTCTGATATGAAATTTATAAAAACTATTCATTTCGCTTATCTTGAGATATAAAAAATGGTGAAATTGCTGAATTTTTCAATAAAGGTAATAGAGCTTTAGGATGAAAAGGTGACTTAATTGTCAGAATGAAAGTAGTTAATAAAGTAAACAAAAGACTGCGTATTTTTTCAAGCTTTTTTGAGAACGATAAATCTAAATTATGGTTCCTTGTTCCAAACGATAAACAAAGTAATCCTAATAAGGGCGTTTTTAACTATAAAACTCAGCACTTTATTGATTAA
|
|
835
|
-
|
|
836
|
-
>Myco_2845_4797_+
|
|
837
|
-
ATGGAAGAAAATAACAAAGCAAATATCTATGACTCTAGTAGCATTAAGGTCCTTGAAGGACTTGAGGCTGTTAGAAAACGCCCTGGAATGTACATTGGTTCTACTGGCGAAGAAGGTTTGCATCACATGATCTGAGAGATAGTAGACAACTCAATTGATGAAGCAATGGGAGGTTTTGCCAGTTTTGTTAAGCTTACCCTTGAAGATAATTTTGTTACCCGTGTAGAGGATGATGGAAGAGGGATACCTGTTGATATCCATCCTAAGACTAATCGTTCTACAGTTGAAACAGTTTTTACAGTTCTACACGCTGGCGGTAAATTTGATAACGATAGCTATAAAGTGTCAGGTGGTTTACACGGTGTTGGTGCATCAGTTGTTAATGCGCTTAGTTCTTCTTTTAAAGTTTGAGTTTTTCGTCAAAATAAAAAGTATTTTCTCAGCTTTAGCGATGGAGGAAAGGTAATTGGAGATTTGGTCCAAGAAGGTAACTCTGAAAAAGAGCATGGAACAATTGTTGAGTTTGTTCCTGATTTCTCTGTAATGGAAAAGAGTGATTACAAACAAACTGTAATTGTAAGCAGACTCCAGCAATTAGCTTTTTTAAACAAGGGAATAAGAATTGACTTTGTTGATAATCGTAAACAAAACCCACAGTCTTTTTCTTGAAAATATGATGGGGGATTGGTTGAATATATCCACCACCTAAACAACGAAAAAGAACCACTTTTTAATGAAGTTATTGCTGATGAAAAAACTGAAACTGTAAAAGCTGTTAATCGTGATGAAAACTACACAGTAAAGGTTGAAGTTGCTTTTCAATATAACAAAACATACAACCAATCAATTTTCAGTTTTTGTAACAACATTAATACTACAGAAGGTGGAACCCATGTGGAAGGTTTTCGTAATGCACTTGTTAAGATCATTAATCGCTTTGCTGTTGAAAATAAATTCCTAAAAGATAGTGATGAAAAGATTAACCGTGATGATGTTTGTGAAGGATTAACTGCTATTATTTCCATTAAACACCCAAACCCACAATATGAAGGACAAACTAAAAAGAAGTTAGGTAATACTGAGGTAAGACCTTTAGTTAATAGTGTTGTTAGTGAAATCTTTGAACGCTTCATGTTAGAAAACCCACAAGAAGCAAACGCTATCATCAGAAAAACACTTTTAGCTCAAGAAGCGAGAAGAAGAAGTCAAGAGGCTAGGGAGTTAACTCGTCGTAAATCACCTTTTGATAGTGGTTCATTACCAGGTAAATTAGCTGATTGTACAACCAGAGATCCTTCGATTAGTGAACTTTACATTGTTGAGGGTGATAGTGCTGGTGGCACTGCTAAAACAGGAAGAGATCGTTATTTTCAAGCTATCTTACCCTTAAGAGGAAAGATTTTAAACGTTGAAAAATCTAACTTTGAACAAATCTTTAATAATGCAGAAATTTCTGCATTAGTGATGGCAATAGGCTGTGGGATTAAACCTGATTTTGAACTTGAAAAACTTAGATATAGCAAGATTGTGATCATGACAGATGCTGATGTTGATGGTGCACACATAAGAACACTTCTCTTAACTTTCTTTTTTCGCTTTATGTATCCTTTGGTTGAACAAGGCAATATTTTTATTGCTCAACCCCCACTTTATAAAGTGTCATATTCCCATAAGGATTTATACATGCACACTGATGTTCAACTTGAACAGTGAAAAAGTCAAAACCCTAACGTAAAGTTTGGGTTACAAAGATATAAAGGACTTGGAGAAATGGATGCATTGCAGCTGTGAGAAACAACAATGGATCCTAAGGTTAGAACATTGTTAAAAGTTACTGTTGAAGATGCTTCTATTGCTGATAAAGCTTTTTCACTGTTGATGGGTGATGAAGTTCCCCCAAGAAGAGAATTTATTGAAAAAAATGCTCGTAGTGTTAAAAACATTGATATTTAA
|
|
838
|
-
|
|
839
|
-
>Myco_7294_8547_+
|
|
840
|
-
ATGTTGGATCCAAACAAATTACGCAATAACTATGATTTCTTTAAAAAGAAACTGTTAGAAAGAAATGTAAATGAGCAATTATTAAATCAGTTTATTCAAACTGATAAACTAATGCGCAAAAACTTGCAACAACTTGAACTTGCTAACCAAAAACAAAGCTTGTTGGCAAAACAAGTTGCTAAGCAAAAAGATAATAAAAAGCTATTAGCTGAATCAAAAGAACTTAAGCAGAAGATTGAAAACTTAAATAATGCTTATAAAGATTCACAAAACATTAGTCAAGATTTACTTCTAAATTTTCCTAATATTGCTCATGAATCAGTTCCTGTTGGTAAAAATGAATCAGCAAACTTAGAACTTCTTAAAGAAGGGAGAAAACCAGTTTTTGATTTCAAACCTTTACCACATCGAGAGTTATGTGAAAAGTTAAATTTAGTTGCTTTTGATAAAGCTACTAAGATTAGTGGAACTAGGTTTGTTGCATATACAGATAAAGCAGCTAAACTACTTAGAGCGATAACTAATCTAATGATTGACCTTAATAAAAGCAAGTATCAAGAATGAAACCTGCCAGTTGTTATTAATGAATTAAGTTTAAGATCAACCGGACAACTACCTAAGTTTAAAGATGATGTTTTTAAACTAGAAAACACCCGTTATTATCTTTCTCCAACTTTAGAGGTACAACTTATCAATTTACATGCTAATGAAATTTTTAATGAAGAAGATTTACCTAAATACTACACTGCAACAGGTATTAACTTTCGTCAAGAAGCGGGTAGTGCTGGTAAACAAACCAAAGGAACTATTAGATTGCATCAGTTTCAAAAAACTGAGTTAGTTAAGTTTTGTAAACCTGAAAATGCTATCAATGAATTGGAAGCAATGGTTAGAGATGCTGAACAAATCTTAAAGGCACTTAAGTTACCTTTTAGAAGGTTATTGTTATGTACTGGTGATATGGGCTTTAGTGCTGAAAAAACATATGATCTTGAAGTTTGAATGGCAGCTAGCAATGAATATCGTGAAGTTTCTTCTTGTTCATCTTGTGGTGATTTTCAAGCAAGAAGAGCTATGATTCGTTACAAAGATATTAACAACGGTAAAAACAGTTATGTTGCTACTTTAAATGGAACAGCATTATCTATTGATAGAATTTTTGCTGCAATTCTAGAAAATTTTCAAACAAAAGATGGCAAAATTCTTATCCCACAAGCATTAAAAAAATACCTTGATTTTGACACAATCAAGTAA
|
|
841
|
-
......
|
|
842
|
-
|
|
843
|
-
ORFs_Without_Corresponding_Gene_In_Reference_Metrics:
|
|
844
|
-
ATG_Start ,GTG_Start ,TTG_Start ,ATT_Start ,CTG_Start ,Alternative_Start_Codon ,TGA_Stop ,TAA_Stop ,TAG_Stop ,Alternative_Stop_Codon ,Median_Length ,ORFs_on_Positive_Strand ,ORFs_on_Negative_Strand
|
|
845
|
-
58.39,17.14,24.47,0.00,0.00,0.00,71.55,20.62,7.83,0.00,287.00,449,356
|
|
846
|
-
ORF_Without_Corresponding_Gene_in_Reference:
|
|
847
|
-
>Prodigal_1828_2073_+
|
|
848
|
-
ATGAATCTTTACGATCTTTTAGAACTACCAACTACAGCATCAATAAAAGAAATAAAAATTGCTTATAAAAGATTAGCAAAGCGTTATCACCCTGATGTAAATAAATTAGGTTCGCAAACTTTTGTTGAAATTAATAATGCTTATTCAATATTAAGTGATCCTAACCAAAAGGAAAAATATGATTCAATGCTGAAAGTTAATGATTTTCAAAATCGCATCAAAAATTTAGATATTAGTGTTAGATGA
|
|
849
|
-
>Prodigal_2605_2760_+
|
|
850
|
-
ATGAAAGTAGTTAATAAAGTAAACAAAAGACTGCGTATTTTTTCAAGCTTTTTTGAGAACGATAAATCTAAATTATGGTTCCTTGTTCCAAACGATAAACAAAGTAATCCTAATAAGGGCGTTTTTAACTATAAAACTCAGCACTTTATTGATTAA
|
|
851
|
-
>Prodigal_2845_2979_+
|
|
852
|
-
ATGGAAGAAAATAACAAAGCAAATATCTATGACTCTAGTAGCATTAAGGTCCTTGAAGGACTTGAGGCTGTTAGAAAACGCCCTGGAATGTACATTGGTTCTACTGGCGAAGAAGGTTTGCATCACATGATCTGA
|
|
853
|
-
>Prodigal_3010_3255_+
|
|
854
|
-
ATGGGAGGTTTTGCCAGTTTTGTTAAGCTTACCCTTGAAGATAATTTTGTTACCCGTGTAGAGGATGATGGAAGAGGGATACCTGTTGATATCCATCCTAAGACTAATCGTTCTACAGTTGAAACAGTTTTTACAGTTCTACACGCTGGCGGTAAATTTGATAACGATAGCTATAAAGTGTCAGGTGGTTTACACGGTGTTGGTGCATCAGTTGTTAATGCGCTTAGTTCTTCTTTTAAAGTTTGA
|
|
855
|
-
>Prodigal_3319_3513_+
|
|
856
|
-
TTGGTCCAAGAAGGTAACTCTGAAAAAGAGCATGGAACAATTGTTGAGTTTGTTCCTGATTTCTCTGTAATGGAAAAGAGTGATTACAAACAAACTGTAATTGTAAGCAGACTCCAGCAATTAGCTTTTTTAAACAAGGGAATAAGAATTGACTTTGTTGATAATCGTAAACAAAACCCACAGTCTTTTTCTTGA
|
|
857
|
-
>Prodigal_3529_4557_+
|
|
858
|
-
TTGGTTGAATATATCCACCACCTAAACAACGAAAAAGAACCACTTTTTAATGAAGTTATTGCTGATGAAAAAACTGAAACTGTAAAAGCTGTTAATCGTGATGAAAACTACACAGTAAAGGTTGAAGTTGCTTTTCAATATAACAAAACATACAACCAATCAATTTTCAGTTTTTGTAACAACATTAATACTACAGAAGGTGGAACCCATGTGGAAGGTTTTCGTAATGCACTTGTTAAGATCATTAATCGCTTTGCTGTTGAAAATAAATTCCTAAAAGATAGTGATGAAAAGATTAACCGTGATGATGTTTGTGAAGGATTAACTGCTATTATTTCCATTAAACACCCAAACCCACAATATGAAGGACAAACTAAAAAGAAGTTAGGTAATACTGAGGTAAGACCTTTAGTTAATAGTGTTGTTAGTGAAATCTTTGAACGCTTCATGTTAGAAAACCCACAAGAAGCAAACGCTATCATCAGAAAAACACTTTTAGCTCAAGAAGCGAGAAGAAGAAGTCAAGAGGCTAGGGAGTTAACTCGTCGTAAATCACCTTTTGATAGTGGTTCATTACCAGGTAAATTAGCTGATTGTACAACCAGAGATCCTTCGATTAGTGAACTTTACATTGTTGAGGGTGATAGTGCTGGTGGCACTGCTAAAACAGGAAGAGATCGTTATTTTCAAGCTATCTTACCCTTAAGAGGAAAGATTTTAAACGTTGAAAAATCTAACTTTGAACAAATCTTTAATAATGCAGAAATTTCTGCATTAGTGATGGCAATAGGCTGTGGGATTAAACCTGATTTTGAACTTGAAAAACTTAGATATAGCAAGATTGTGATCATGACAGATGCTGATGTTGATGGTGCACACATAAGAACACTTCTCTTAACTTTCTTTTTTCGCTTTATGTATCCTTTGGTTGAACAAGGCAATATTTTTATTGCTCAACCCCCACTTTATAAAGTGTCATATTCCCATAAGGATTTATACATGCACACTGATGTTCAACTTGAACAGTGA
|
|
859
|
-
....
|
|
860
|
-
ORFs_Which_Detected_more_than_one_Gene:
|
|
797
|
+
47,2.17,46,97.87,46,100.00,378.0,1.20,138,0.00,1551,0.00,54.79,-0.05,54.81,0.00,36,63.64,61,0.00,22,0.00,61,3.00,13,0.00,13,0.00,45,97.83,45,97.83,46,100.00,0,0,0.00,1,2.17,0,0.00,11,0.23,36,0.77,6.0,N/A,82.98,12.77,2.13,0.00,0.00,2.13,6.38,23.40,68.09,2.13,1.00,0.02,0.00,0.98,1.00,0.02,1.00,0.16,0.84,0.00,0.96,1.00,0.04,81.98,78.63
|
|
798
|
+
Reference_CDS_Gene_Coverage_of_Genome
|
|
799
|
+
78.61
|
|
800
|
+
Predicted_CDS_Coverage_of_Genome
|
|
801
|
+
81.98
|
|
802
|
+
Matched_Predicted_CDS_Coverage_of_Genome
|
|
803
|
+
78.63
|
|
861
804
|
|
|
862
805
|
```
|
|
863
806
|
|
|
864
807
|
|
|
865
|
-
## GFF Tools:
|
|
866
|
-
|
|
808
|
+
## GFF/Annotation Manipulation Tools: ORForise also provides tools to manipulate and combine existing annotations in GFF format or other tool-specific formats.
|
|
867
809
|
### GFF-Adder:
|
|
868
|
-
|
|
869
|
-
GFF-Adder allows for the addition of predicted CDSs to an existing reference annotation (GFF or another tool) which produces a new GFF containing the original
|
|
870
|
-
genes plus the new CDS from another prediction. Default filtering will remove additional CDSs that overlap existing genes by more than 50 nt.
|
|
871
|
-
The ```-gi``` option can be used to allow for different genomic elements to be accounted for, other than only CDSs in the reference annotation.
|
|
872
|
-
|
|
810
|
+
GFF-Adder combines two existing annotations (GFF or other tool formats).
|
|
873
811
|
For Help: ```GFF-Adder -h ```
|
|
874
812
|
|
|
875
813
|
```python
|
|
876
|
-
ORForise v1.6.
|
|
814
|
+
ORForise v1.6.1: GFF-Adder Run Parameters.
|
|
877
815
|
|
|
878
816
|
Required Arguments:
|
|
879
817
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
880
818
|
-ref REFERENCE_ANNOTATION
|
|
881
819
|
Which reference annotation file to use as reference?
|
|
882
|
-
-at ADDITIONAL_TOOL Which format to use for additional annotation?
|
|
820
|
+
-at ADDITIONAL_TOOL Which format to use for additional annotation? - Can provide multiple annotations (Tool1,Tool2)
|
|
883
821
|
-add ADDITIONAL_ANNOTATION
|
|
884
|
-
Which annotation file to add to reference annotation?
|
|
822
|
+
Which annotation file to add to reference annotation? - Can provide multiple annotations (1.GFF,2.GFF)
|
|
885
823
|
-o OUTPUT_FILE Output filename
|
|
886
824
|
|
|
887
825
|
Optional Arguments:
|
|
888
|
-
-rt REFERENCE_TOOL Which tool format to use as reference? - If not provided, will default to standard
|
|
889
|
-
|
|
890
|
-
-gi GENE_IDENT Identifier used for extraction of "genic" regions from reference annotation "CDS,rRNA,tRNA":
|
|
891
|
-
Default for is "CDS"
|
|
892
|
-
-gene_ident GENE_IDENT
|
|
826
|
+
-rt REFERENCE_TOOL Which tool format to use as reference? - If not provided, will default to the standard GFF format and will only look for "CDS" features
|
|
827
|
+
--gene_ident GENE_IDENT
|
|
893
828
|
Identifier used for identifying genomic features in reference annotation "CDS,rRNA,tRNA"
|
|
894
|
-
-
|
|
895
|
-
|
|
829
|
+
-mc Default - False: Mark reference annotations which where present in the additional tool annotation
|
|
830
|
+
-c Default - False: Do not mark 9th column with "Original/Matched/Additional tag"
|
|
831
|
+
--meta Default - False: Output metadata file
|
|
832
|
+
--olap OVERLAP Maximum overlap between reference and additional genic regions (CDS,rRNA etc) - Default: 50 nt
|
|
833
|
+
|
|
834
|
+
Misc:
|
|
835
|
+
-v {True,False} Default - False: Print out runtime status
|
|
836
|
+
|
|
837
|
+
|
|
896
838
|
```
|
|
897
839
|
|
|
898
840
|
#### Example: Running GFF-Adder to combine the additional CDS predictions made by Prodial to the canonical annotations from Ensembl.
|
|
899
|
-
``` GFF-Adder -dna ~/
|
|
841
|
+
``` GFF-Adder -dna ~/Test_Data/Genomes/E-coli/Escherichia_coli.fasta -ref ~/Test_Data/Genomes/E-coli/Escherichia_coli.gff -at Prodigal -add ~/Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff -o ~/Test_Data/Genomes/E-coli/Ensembl_AND_Prodigal_Escherichia_coli.gff ```
|
|
900
842
|
#### Example Output: ~/ORForise/Testing/Myco_Ensembl_GFF_Adder_Prodigal.gff
|
|
901
843
|
```
|
|
902
844
|
##gff-version 3
|
|
903
845
|
# GFF-Adder
|
|
904
|
-
# Run Date:
|
|
905
|
-
##Genome DNA File
|
|
906
|
-
##Original File:
|
|
907
|
-
##Additional File:
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
Chromosome Reference_Annotation CDS 72523 73434 . + . ID=Original_Annotation
|
|
912
|
-
Chromosome Prodigal CDS 73445 73648 . + . ID=Additional_Annotation
|
|
913
|
-
Chromosome Reference_Annotation CDS 73690 77685 . + . ID=Original_Annotation
|
|
914
|
-
Chromosome Reference_Annotation CDS 77685 79085 . + . ID=Original_Annotation
|
|
915
|
-
Chromosome Reference_Annotation CDS 79089 81035 . + . ID=Original_Annotation
|
|
916
|
-
Chromosome Reference_Annotation CDS 81046 82596 . + . ID=Original_Annotation
|
|
917
|
-
Chromosome Reference_Annotation CDS 82620 84044 . + . ID=Original_Annotation
|
|
918
|
-
Chromosome Prodigal CDS 84082 84312 . + . ID=Additional_Annotation
|
|
919
|
-
Chromosome Prodigal CDS 84532 84744 . - . ID=Additional_Annotation
|
|
920
|
-
Chromosome Prodigal CDS 84776 85051 . + . ID=Additional_Annotation
|
|
846
|
+
# Run Date:2026-01-11
|
|
847
|
+
##Genome DNA File:../../Test_Data/Genomes/E-coli/Escherichia_coli.fasta
|
|
848
|
+
##Original File: ../../Test_Data/Genomes/E-coli/Escherichia_coli.gff
|
|
849
|
+
##Additional File: ../../Test_Data/Genomes/E-coli/Prodigal_Escherichia_coli.gff
|
|
850
|
+
ERS715463SCcontig000003 Prodigal CDS 2 388 . + . ID=Additional_Annotations;Prodigal
|
|
851
|
+
ERS715463SCcontig000003 MGnify CDS 83 388 . + . ID=Original_Annotation;ID=ENSB_0kRwXBh8bjHtVl3;Parent=transcript:ENSB:0kRwXBh8bjHtVl3;protein_id=ENSB:0kRwXBh8bjHtVl3
|
|
852
|
+
ERS715463SCcontig000003 MGnify CDS 453 542 . + . ID=Original_Annotation;ID=ENSB_W8Go0tx9y9dAtng;Parent=transcript:ENSB:W8Go0tx9y9dAtng;protein_id=ENSB:W8Go0tx9y9dAtng;Matched_Annotations=Prodigal
|
|
921
853
|
```
|
|
922
854
|
|
|
923
|
-
###
|
|
855
|
+
### Annotation-Intersector:
|
|
924
856
|
|
|
925
|
-
|
|
926
|
-
representing the intersection of the two existing annotations.
|
|
927
|
-
GFF-Intersector also provides an option to allow the retention of genes that have a user defined difference (minimum % coverage and in-frame).
|
|
928
|
-
The ```-gi``` option can be used to allow for different genomic elements to be accounted for, other than only CDSs in the reference annotation.
|
|
857
|
+
Annotation-Intersector combines and contracts two existing annotations (GFF or other tool formats)
|
|
929
858
|
|
|
930
|
-
For Help: ```
|
|
859
|
+
For Help: ```Annotation-Intersector -h ```
|
|
931
860
|
```python
|
|
932
|
-
|
|
861
|
+
Thank you for using ORForise
|
|
862
|
+
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
863
|
+
#####
|
|
864
|
+
usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
|
|
865
|
+
ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
866
|
+
OUTPUT_FILE [-dna GENOME_DNA]
|
|
867
|
+
[-rt REFERENCE_TOOL] [-gi GENE_IDENT]
|
|
868
|
+
[-cov COVERAGE] [--report-discordance]
|
|
869
|
+
[--report-discordance-file REPORT_DISCORDANCE_FILE]
|
|
870
|
+
|
|
871
|
+
ORForise v1.6.1: Annotation-Intersector Run Parameters
|
|
872
|
+
|
|
873
|
+
options:
|
|
874
|
+
-h, --help show this help message and exit
|
|
933
875
|
|
|
934
876
|
Required Arguments:
|
|
935
|
-
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
936
877
|
-ref REFERENCE_ANNOTATION
|
|
937
|
-
|
|
938
|
-
-at ADDITIONAL_TOOL
|
|
878
|
+
Reference annotation GFF file
|
|
879
|
+
-at ADDITIONAL_TOOL Tool name/format for additional annotation (module
|
|
880
|
+
under Tools/)
|
|
939
881
|
-add ADDITIONAL_ANNOTATION
|
|
940
|
-
|
|
941
|
-
-o OUTPUT_FILE Output filename
|
|
882
|
+
Additional annotation file to compare
|
|
883
|
+
-o OUTPUT_FILE Output GFF filename for kept genes
|
|
942
884
|
|
|
943
885
|
Optional Arguments:
|
|
944
|
-
-
|
|
945
|
-
|
|
946
|
-
-
|
|
947
|
-
|
|
948
|
-
-
|
|
949
|
-
|
|
886
|
+
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based
|
|
887
|
+
on
|
|
888
|
+
-rt REFERENCE_TOOL Reference tool parser name (if not provided, GFF is
|
|
889
|
+
expected)
|
|
890
|
+
-gi GENE_IDENT Comma-separated feature types to consider from
|
|
891
|
+
reference (default: CDS)
|
|
892
|
+
-cov COVERAGE, --coverage COVERAGE
|
|
893
|
+
Percentage coverage threshold for intersection
|
|
894
|
+
(default 100)
|
|
895
|
+
--report-discordance If set, produce discordance reports (three GFFs)
|
|
896
|
+
--report-discordance-file REPORT_DISCORDANCE_FILE
|
|
897
|
+
Optional base path for discordance reports
|
|
898
|
+
|
|
950
899
|
```
|
|
951
900
|
|
|
952
|
-
#### Example: Running
|
|
953
|
-
```
|
|
901
|
+
#### Example: Running Annotation-Intersector to combine and contract annotations from multiple tools or reference files.
|
|
902
|
+
``` Annotation-Intersector -ref .../ORForise/Tools/EasyGene/EasyGene_E-coli_E-coli.gff -rt EasyGene -at Prodigal -add .../ORForise/Tools/Prodigal/Prodigal_E-coli.gff -o .../Test_Data/Annotation-Intersector/Annotation-Intersect.gff --report-discordance ```
|
|
954
903
|
|
|
955
|
-
#### Example Output:
|
|
904
|
+
#### Example Output:
|
|
905
|
+
##### .../Test_Data/Annotation-Intersector/Annotation-Intersect.gff
|
|
906
|
+
```
|
|
907
|
+
##gff-version 3
|
|
908
|
+
# Annotation-Intersector
|
|
909
|
+
# Run Date:2026-01-09
|
|
910
|
+
##Original File: .../ORForise/Tools/EasyGene/EasyGene_E-coli_E-coli.gff
|
|
911
|
+
##Intersecting File: .../ORForise/Tools/Prodigal/Prodigal_E-coli.gff
|
|
912
|
+
Chromosome EasyGene CDS 337 2799 . + . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
|
|
913
|
+
Chromosome EasyGene CDS 3734 5020 . + . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
|
|
914
|
+
Chromosome EasyGene CDS 5683 6459 . - . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
|
|
915
|
+
Chromosome EasyGene CDS 6529 7959 . - . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
|
|
916
|
+
Chromosome EasyGene CDS 8238 9191 . + . ID=Original_Annotation=EasyGene;Additional_Annotation=Prodigal;Coverage=100.0
|
|
917
|
+
```
|
|
918
|
+
#### .../Test_Data/Annotation-Intersector/Annotation-Intersect.only_in_reference.gff
|
|
919
|
+
```
|
|
920
|
+
##gff-version 3
|
|
921
|
+
# Annotation-Intersector discordance report
|
|
922
|
+
# Run Date:2026-01-09
|
|
923
|
+
##Original File: EasyGene_E-coli_E-coli
|
|
924
|
+
Chromosome EasyGene CDS 408401 408484 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
|
|
925
|
+
Chromosome EasyGene CDS 1272584 1272886 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
|
|
926
|
+
Chromosome EasyGene CDS 2574901 2574960 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
|
|
927
|
+
Chromosome EasyGene CDS 2710019 2710081 . . . Status=only_in_ref;Coverage=0.00;Ref_info=EasyGene
|
|
928
|
+
```
|
|
929
|
+
#### .../Test_Data/Annotation-Intersector/Annotation-Intersect.mismatches.gff
|
|
956
930
|
```
|
|
957
931
|
##gff-version 3
|
|
958
|
-
#
|
|
959
|
-
# Run Date:
|
|
960
|
-
##
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
Chromosome
|
|
964
|
-
Chromosome
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
Chromosome original CDS 59082 59753 . + . ID=Original_Annotation;Coverage=100
|
|
971
|
-
Chromosome original CDS 61014 61406 . + . ID=Original_Annotation;Coverage=100
|
|
972
|
-
Chromosome original CDS 82620 84044 . + . ID=Original_Annotation;Coverage=100
|
|
932
|
+
# Annotation-Intersector discordance report
|
|
933
|
+
# Run Date:2026-01-09
|
|
934
|
+
##Original File: EasyGene_E-coli_E-coli
|
|
935
|
+
Chromosome EasyGene CDS 18715 19620 . . . Status=found_in_additional_but_below_coverage;Coverage=99.34;Ref_info=EasyGene;Add_info=Prodigal
|
|
936
|
+
Chromosome EasyGene CDS 19811 20314 . . . Status=found_in_additional_but_below_coverage;Coverage=75.00;Ref_info=EasyGene;Add_info=Prodigal
|
|
937
|
+
Chromosome EasyGene CDS 29624 30799 . . . Status=found_in_additional_but_below_coverage;Coverage=97.70;Ref_info=EasyGene;Add_info=Prodigal
|
|
938
|
+
Chromosome EasyGene CDS 70378 71265 . . . Status=found_in_additional_but_below_coverage;Coverage=98.99;Ref_info=EasyGene;Add_info=Prodigal
|
|
939
|
+
|
|
940
|
+
```
|
|
941
|
+
|
|
942
|
+
#### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
|
|
943
|
+
For Help: ```Convert_To_GFF.py -h ```
|
|
973
944
|
```
|
|
945
|
+
Thank you for using ORForise
|
|
946
|
+
Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
947
|
+
#####
|
|
948
|
+
usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
|
|
974
949
|
|
|
950
|
+
ORForise v1.6.1: Convert-To-GFF Run Parameters
|
|
951
|
+
|
|
952
|
+
Required Arguments:
|
|
953
|
+
-dna GENOME_DNA Genome DNA file (.fa)
|
|
954
|
+
-i INPUT_ANNOTATION Input annotation file (tabular)
|
|
955
|
+
-fmt FORMAT Input format: blast, abricate, genemark
|
|
956
|
+
-o OUTPUT_DIR Output directory
|
|
957
|
+
|
|
958
|
+
Optional Arguments:
|
|
959
|
+
-gi GENE_IDENT Gene identifier types to extract (unused)
|
|
960
|
+
--verbose Verbose logging with logfile
|
|
961
|
+
```
|
|
975
962
|
|
|
976
963
|
# Genomes Available:
|
|
977
964
|
|
|
@@ -984,17 +971,17 @@ The .fa and .gff files (from Ensembl Bacteria Release 46) below are available in
|
|
|
984
971
|
* *Pseudomonas fluorescens* - Strain UK4 - Assembly ASM73042v1
|
|
985
972
|
* *Staphylococcus aureus* - Strain 502A - Assembly ASM59796v1
|
|
986
973
|
|
|
987
|
-
# Prediction Tools Available:
|
|
988
974
|
|
|
989
|
-
There are two Groups of tools - Those which do require a pre-built model and those which do not. \
|
|
990
|
-
For the example runs provided, each tool is listed with the non-default options used and their predictions for each of the 6 model organisms are available in their respective
|
|
991
|
-
directories.
|
|
992
|
-
ORForise only needs the tool name and the annotation file produced from any available model to undertake the analysis.
|
|
993
975
|
|
|
994
|
-
|
|
976
|
+
# Prediction Tool Formats Currently Available:
|
|
977
|
+
ORForise currently supports the comparison of multiple gene prediction tools via their output in GFF3 format. \
|
|
978
|
+
This can be used to compare different annotations with eachother or additional tools which use the GFF3 format.
|
|
979
|
+
|
|
980
|
+
## Tool Specific Formats:
|
|
981
|
+
Run ```List-Tools``` to see the available tools. \
|
|
982
|
+
ORForise only needs the tool name and the annotation file produced from any compatible tool to undertake the analysis.
|
|
995
983
|
|
|
996
|
-
|
|
997
|
-
This can be used to compare different cannonical annotations with eachother or additional tools which use the GFF3 format.
|
|
984
|
+
**If the tool uses another non-standard format, a request can be made to add it as an option via GitHub.**
|
|
998
985
|
|
|
999
986
|
## Model Based Tools:
|
|
1000
987
|
|
|
@@ -1018,16 +1005,16 @@ This tool has two comparisons with the organism models *E. coli - K12 - MG165* a
|
|
|
1018
1005
|
**FragGeneScan - Version 1.3.0** - https://omics.informatics.indiana.edu/FragGeneScan/
|
|
1019
1006
|
The 'complete' genome option was selected and GFF was chosen as output type.
|
|
1020
1007
|
|
|
1021
|
-
**
|
|
1008
|
+
**GeneMarkHA - Version 3.25** - http://exon.gatech.edu/GeneMark/heuristic_gmhmmp.cgi
|
|
1022
1009
|
GFF was chosen as output type.
|
|
1023
1010
|
|
|
1024
1011
|
**GeneMarkS - Version 4.25** - http://exon.gatech.edu/GeneMark/genemarks.cgi
|
|
1025
1012
|
GFF was chosen as output type.
|
|
1026
1013
|
|
|
1027
|
-
**
|
|
1014
|
+
**GeneMarkS2 - Version '2020'** - http://exon.gatech.edu/GeneMark/genemarks2.cgi
|
|
1028
1015
|
GFF3 was chosen as output type.
|
|
1029
1016
|
|
|
1030
|
-
**
|
|
1017
|
+
**GLIMMER3 - Version 3.02** - http://ccb.jhu.edu/software/glimmer/index.shtml
|
|
1031
1018
|
Default parameters from manual were used.
|
|
1032
1019
|
|
|
1033
1020
|
**MetaGene - Version 2.24.0** - https://www.ncbi.nlm.nih.gov/pmc/articles/PMC1636498/
|