PyPI - ORForise - Versions diffs - 1.6.4__py3-none-any.whl → 1.6.5__py3-none-any.whl - Mend

ORForise 1.6.4py3-none-any.whl → 1.6.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

ORForise/Convert_To_GFF.py CHANGED Viewed

@@ -4,13 +4,13 @@ from datetime import datetime
 import os
 import sys
 try:
     from .utils import *
-    from .Aux.TabToGFF.TabToGFF import TabToGFF
+    from .Tools.TabToGFF.TabToGFF import TabToGFF
 except (ImportError, ModuleNotFoundError):
     from utils import *
-    from ORForise.src.ORForise.Aux.TabToGFF import TabToGFF
+    from Tools.TabToGFF.TabToGFF import TabToGFF
@@ -50,10 +50,37 @@ def write_gff(outpath, genome_ID, genome_DNA, input_annotation, fmt, features):
             pos_ = pos.split(',')
             start = pos_[0]
             stop = pos_[-1]
-            strand = data['strand']
+            strand = data.get('strand', '.')
             if fmt == 'abricate': # Currently only supports abricate format
-                info = 'abricate_anotation;accession='+data['accession']+';database='+data['database']+';identity='+str(data['identity'])+';coverage='+str(data['coverage'])+';product='+data['product']+';resistance='+data['resistance']
-            entry = f"{data['seqid']}\t{fmt}\t{'CDS'}\t{start}\t{stop}\t.\t{strand}\t.\t{'ID='}{info}\n"
+                info = 'abricate_annotation;accession={};database={};identity={};coverage={};product={};resistance={}'.format(
+                    data.get('accession', 'unknown'),
+                    data.get('database', 'unknown'),
+                    data.get('identity', ''),
+                    data.get('coverage', ''),
+                    data.get('product', ''),
+                    data.get('resistance', '')
+                )
+            elif fmt in ('amrfinder', 'amrfinderplus', 'amr'):
+                # Build a compact attribute string for amrfinder-plus output
+                info = ('amrfinder_annotation;element={};element_name={};protein_id={};type={};class={};subclass={};method={};pct_cov={};pct_id={};closest_acc={};closest_name={}').format(
+                    data.get('element_symbol', ''),
+                    data.get('element_name', ''),
+                    data.get('protein_id', ''),
+                    data.get('type', ''),
+                    data.get('class', ''),
+                    data.get('subclass', ''),
+                    data.get('method', ''),
+                    data.get('pct_coverage', ''),
+                    data.get('pct_identity', ''),
+                    data.get('closest_accession', ''),
+                    data.get('closest_name', '')
+                )
+            else:
+                # Generic fallback: try to include any seqid/gene info if present
+                gene_id = data.get('gene') or data.get('ID') or ''
+                info = f"annotation;id={gene_id}"
+            entry = f"{data.get('seqid', genome_ID)}\t{fmt}\tCDS\t{start}\t{stop}\t.\t{strand}\t.\tID={info}\n"
             out.write(entry)
@@ -79,7 +106,7 @@ def main():
     required = parser.add_argument_group('Required Arguments')
     required.add_argument('-i', dest='input_annotation', required=True, help='Input annotation file (tabular)')
-    required.add_argument('-fmt', dest='format', required=True, help='Input format: blast, abricate, genemark')
+    required.add_argument('-fmt', dest='format', required=True, help='Input format: amrfinder, abricate, blast')
     required.add_argument('-o', dest='output_dir', required=True, help='Output directory')
     optional = parser.add_argument_group('Optional Arguments')

ORForise/{Aux → Tools}/TabToGFF/TabToGFF.py RENAMED Viewed

@@ -128,6 +128,76 @@ def parse_genemark(path, genome_seq, gene_ident=None):
     return results
+def parse_amrfinderplus(path, genome_seq, gene_ident=None):
+    """
+    Parse amrfinder-plus TSV (header line present). Produces an OrderedDict
+    keyed by "start,stop" -> attrs dict similar to parse_abricate.
+    """
+    results = collections.OrderedDict()
+    count = 0
+    with open(path, 'r') as fh:
+        header = None
+        header_map = {}
+        for i, line in enumerate(fh, 1):
+            line = line.rstrip('\n')
+            if not line:
+                continue
+            # Skip comment lines but treat the first non-empty non-comment line as header
+            if line.startswith('#'):
+                continue
+            if header is None:
+                header = line.split('\t')
+                header_map = {h.strip(): idx for idx, h in enumerate(header)}
+                continue
+            parts = line.split('\t')
+            # allow lines with fewer/more columns but avoid crashes
+            if header and len(parts) < len(header):
+                logging.warning(f"Line {i}: unexpected number of columns in amrfinder line")
+                continue
+            try:
+                start = int(parts[header_map.get('Start')])
+                end = int(parts[header_map.get('Stop')])
+            except Exception:
+                logging.warning(f"Line {i}: invalid Start/Stop in amrfinder line")
+                continue
+            strand = parts[header_map.get('Strand', '')]
+            seqid = parts[header_map.get('Contig id', '')]
+            protein_id = parts[header_map.get('Protein id', '')]
+            element_symbol = parts[header_map.get('Element symbol', '')]
+            element_name = parts[header_map.get('Element name', '')]
+            amr_type = parts[header_map.get('Type', '')]
+            amr_subtype = parts[header_map.get('Subtype', '')]
+            amr_class = parts[header_map.get('Class', '')]
+            amr_subclass = parts[header_map.get('Subclass', '')]
+            method = parts[header_map.get('Method', '')]
+            pct_cov = parts[header_map.get('% Coverage of reference', '')]
+            pct_id = parts[header_map.get('% Identity to reference', '')]
+            closest_acc = parts[header_map.get('Closest reference accession', '')]
+            closest_name = parts[header_map.get('Closest reference name', '')]
+            attrs = {
+                'seqid': seqid,
+                'start': start,
+                'end': end,
+                'strand': strand,
+                'protein_id': protein_id,
+                'element_symbol': element_symbol,
+                'element_name': element_name,
+                'type': amr_type,
+                'subtype': amr_subtype,
+                'class': amr_class,
+                'subclass': amr_subclass,
+                'method': method,
+                'pct_coverage': pct_cov,
+                'pct_identity': pct_id,
+                'closest_accession': closest_acc,
+                'closest_name': closest_name
+            }
+            results[f"{start},{end}"] = attrs
+            count += 1
+    return results
 def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
     # Should be cleaned up to use consistent format names
     fmt = fmt.lower()
@@ -137,4 +207,6 @@ def TabToGFF(input_file, genome_seq, gene_ident='CDS', fmt='blast'):
         return parse_abricate(input_file, genome_seq, gene_ident)
     if fmt in ('genemark', 'gene_mark'):
         return parse_genemark(input_file, genome_seq, gene_ident)
+    if fmt in ('amrfinder', 'amrfinderplus', 'amr'):
+        return parse_amrfinderplus(input_file, genome_seq, gene_ident)
     raise ValueError(f"Unknown format: {fmt}")

ORForise/utils.py CHANGED Viewed

@@ -4,7 +4,7 @@ import collections
 # Constants
 SHORT_ORF_LENGTH = 300
 MIN_COVERAGE = 75
-ORForise_Version = 'v1.6.4'
+ORForise_Version = 'v1.6.5'
 CLOSING=("\n####\nThank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n"
         "Please Cite: https://doi.org/10.1093/bioinformatics/btab827\n"
         "#####")

{orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: ORForise
-Version: 1.6.4
+Version: 1.6.5
 Summary: ORForise - A platform for analysing and comparing genome annotations.
 Author-email: Nicholas Dimonaco <nicholas@dimonaco.co.uk>
 License:                     GNU GENERAL PUBLIC LICENSE
@@ -662,7 +662,7 @@ Example output files from ```Annotation-Compare```, ```Aggregate-Compare```, ```
 For Help: ```Annotation-Compare -h ```
 ```python
-ORForise v1.6.4: Annotatione-Compare Run Parameters.
+ORForise v1.6.5: Annotatione-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -730,7 +730,7 @@ ORForise can be used as the example below.
 For Help: ```Aggregate-Compare -h ```
 ```python
-ORForise v1.6.4: Aggregate-Compare Run Parameters.
+ORForise v1.6.5: Aggregate-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -811,7 +811,7 @@ GFF-Adder combines two existing annotations (GFF or other tool formats).
 For Help: ```GFF-Adder -h ```
 ```python
-ORForise v1.6.4: GFF-Adder Run Parameters.
+ORForise v1.6.5: GFF-Adder Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -868,7 +868,7 @@ usage: Annotation_Intersector.py [-h] -ref REFERENCE_ANNOTATION -at
                                  [-cov COVERAGE] [--report-discordance]
                                  [--report-discordance-file REPORT_DISCORDANCE_FILE]
-ORForise v1.6.4: Annotation-Intersector Run Parameters
+ORForise v1.6.5: Annotation-Intersector Run Parameters
 options:
   -h, --help            show this help message and exit
@@ -939,7 +939,7 @@ Chromosome	EasyGene	CDS	70378	71265	.	.	.	Status=found_in_additional_but_below_c
 ```
-#### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses.
+#### Convert-To-GFF: Converts tool-specific output files to standard GFF3 format for use in ORForise analyses (BETA!!!).
 For Help: ```Convert_To_GFF.py -h ```
 ```
 Thank you for using ORForise
@@ -947,7 +947,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 #####
 usage: Convert_To_GFF.py [-h] [-dna GENOME_DNA] -i INPUT_ANNOTATION -fmt FORMAT -o OUTPUT_DIR [-gi GENE_IDENT] [--verbose]
-ORForise v1.6.4: Convert-To-GFF Run Parameters
+ORForise v1.6.5: Convert-To-GFF Run Parameters
 Required Arguments:
   -dna GENOME_DNA      Genome DNA file (.fa)

{orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/RECORD RENAMED Viewed

@@ -2,12 +2,12 @@ ORForise/Aggregate_Compare.py,sha256=AzGOfuQLt4haw4rdCwIEag5Y7hnXHLLApkTa6_j99-A
 ORForise/Annotation_Compare.py,sha256=7_LwWKDKZHBrhUWODxTJgd-tppaA4k5IvNuX4bU8_2Q,18571
 ORForise/Annotation_Intersector.py,sha256=7VH7iHk4m1c08AeKf9vGEYuAecsywfC4AQHUlIbgQKQ,35856
 ORForise/Comparator.py,sha256=59VfUS8d19Xa83o1AsCuowDhhe-iNr5wO4FutDpoQRs,48078
-ORForise/Convert_To_GFF.py,sha256=zkpO3vpLxA7EpKe1X1i-_IPbcU3lbwLCsh30mmeuZkI,6030
+ORForise/Convert_To_GFF.py,sha256=N8yKhcbmtYOH3KBZFf1u2BhWOwnuVTU864RJiextnIk,7332
 ORForise/GFF_Adder.py,sha256=PuOZl4TUN9SbMjGhkuF92UDePAnx0NdVAuWFRxR61XA,28670
 ORForise/List_Tools.py,sha256=OZadIWAP0HJ_JYlTDqWw_EA8Mkew-26_cKOkRE4i7ro,1618
 ORForise/StORForise.py,sha256=yRZtKXKcmevxZ_2asesYdkl-qen3MmOn9_r0vb0927I,5772
 ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ORForise/utils.py,sha256=QdXT0XkEIjMbu4ef2HDwAKa_19m8oeu4QV8oLll5gpk,15759
+ORForise/utils.py,sha256=H7zOrQXn7PWzCaqOzRHyP4eG9Y54KnmhKmp9DCP6Ik0,15759
 ORForise/Aux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Aux/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
 ORForise/Aux/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -15,8 +15,6 @@ ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha2
 ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
-ORForise/Aux/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
-ORForise/Aux/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
 ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
@@ -63,11 +61,13 @@ ORForise/Tools/Prokka/Prokka.py,sha256=Kcl1ocVj6hPOfEEwf8bBAWhzWX_XAe55kwNUeM8EU
 ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Tools/StORF-Reporter/StORF-Reporter.py,sha256=BQpFfpXtcNC4C_P4Bk5IZZ9__Xy2VNcbh7zzSDnrNOE,2647
 ORForise/Tools/StORF-Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+ORForise/Tools/TabToGFF/TabToGFF.py,sha256=RoWOpW1gx7SIcih9MDjRArAF6AXKjRgyV30OCniTnB8,8334
+ORForise/Tools/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Tools/TransDecoder/TransDecoder.py,sha256=l9y4OFxhSdPRBhUprs0yt2fxtSwyNCOv7oKO-aTvpDk,2381
 ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-orforise-1.6.4.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
-orforise-1.6.4.dist-info/METADATA,sha256=cJbN2ekkUs5mP8izYLMqxv8r4awotKf6DtVQNDvuPFo,59575
-orforise-1.6.4.dist-info/WHEEL,sha256=qELbo2s1Yzl39ZmrAibXA2jjPLUYfnVhUNTlyF1rq0Y,92
-orforise-1.6.4.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
-orforise-1.6.4.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
-orforise-1.6.4.dist-info/RECORD,,
+orforise-1.6.5.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
+orforise-1.6.5.dist-info/METADATA,sha256=tZyLeg5VtXRWYvu_fULSBtUyLd5WG1f-HDWEIB2qJJw,59585
+orforise-1.6.5.dist-info/WHEEL,sha256=wUyA8OaulRlbfwMtmQsvNngGrxQHAvkKcvRmdizlJi0,92
+orforise-1.6.5.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
+orforise-1.6.5.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
+orforise-1.6.5.dist-info/RECORD,,

{orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (80.10.1)
+Generator: setuptools (80.10.2)
 Root-Is-Purelib: true
 Tag: py3-none-any

/ORForise/{Aux → Tools}/TabToGFF/__init__.py RENAMED Viewed

File without changes

{orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

{orforise-1.6.4.dist-info → orforise-1.6.5.dist-info}/top_level.txt RENAMED Viewed

File without changes

ORForise 1.6.4__py3-none-any.whl → 1.6.5__py3-none-any.whl

ORForise 1.6.4py3-none-any.whl → 1.6.5py3-none-any.whl