PyPI - ORForise - Versions diffs - 1.4.1__tar.gz → 1.4.2__tar.gz - Mend

ORForise 1.4.1tar.gz → 1.4.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

{ORForise-1.4.1 → orforise-1.4.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ORForise
-Version: 1.4.1
+Version: 1.4.2
 Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
 Home-page: https://github.com/NickJD/ORForise
 Author: Nicholas Dimonaco
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: numpy
 # ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
 ## Published in Bioinformatics :   https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
@@ -61,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
                              [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.1: Annotatione-Compare Run Parameters.
+ORForise v1.4.2: Annotatione-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -111,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
                             [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.1: Aggregate-Compare Run Parameters.
+ORForise v1.4.2: Aggregate-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -265,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
                     OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
-ORForise v1.4.1: GFF-Adder Run Parameters.
+ORForise v1.4.2: GFF-Adder Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -327,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
                           ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
-ORForise v1.4.1: GFF-Intersector Run Parameters.
+ORForise v1.4.2: GFF-Intersector Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on

{ORForise-1.4.1 → orforise-1.4.2}/README.md RENAMED Viewed

@@ -46,7 +46,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
                              [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.1: Annotatione-Compare Run Parameters.
+ORForise v1.4.2: Annotatione-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -96,7 +96,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
                             [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.1: Aggregate-Compare Run Parameters.
+ORForise v1.4.2: Aggregate-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -250,7 +250,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
                     OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
-ORForise v1.4.1: GFF-Adder Run Parameters.
+ORForise v1.4.2: GFF-Adder Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -312,7 +312,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
                           ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
-ORForise v1.4.1: GFF-Intersector Run Parameters.
+ORForise v1.4.2: GFF-Intersector Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on

{ORForise-1.4.1 → orforise-1.4.2}/setup.cfg RENAMED Viewed

@@ -1,6 +1,6 @@
 [metadata]
 name = ORForise
-version = 1.4.1
+version = 1.4.2
 author = Nicholas Dimonaco
 author_email = nicholas@dimonaco.co.uk
 description = ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.

{ORForise-1.4.1 → orforise-1.4.2}/src/ORForise/Annotation_Compare.py RENAMED Viewed

@@ -72,11 +72,11 @@ def comparator(options):
     rep_metric_description = list(all_rep_Metrics.keys())
     rep_metrics = list(all_rep_Metrics.values())
     ############## Printing to std-out and optional csv file
-    print('Genome Used: ' + str(options.reference_annotation.split('/')[-1]))
+    print('Genome Used: ' + str(options.genome_DNA.split('/')[-1]))
     if options.reference_tool:
         print('Reference Tool Used: '+str(options.reference_tool))
     else:
-        print('Reference Used: ' + str(options.reference_annotation))
+        print('Reference Used: ' + str(options.reference_annotation.split('/')[-1]))
     print('Tool Compared: '+str(options.tool))
     print('Perfect Matches: ' + str(len(perfect_Matches)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(perfect_Matches)/len(ref_genes),'.2f')+'%')
     print('Partial Matches: ' + str(len(partial_Hits)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(partial_Hits)/len(ref_genes),'.2f')+'%')

{ORForise-1.4.1 → orforise-1.4.2}/src/ORForise/Comparator.py RENAMED Viewed

@@ -47,13 +47,30 @@ comp = comparator()
 def is_double_range(range1, range2):
     return len(range1) >= 2 * len(range2)
-def nuc_Count(start, stop, strand):  # Gets correct seq then returns GC
-    if strand == '-':
-        r_Start = comp.genome_Size - stop
-        r_Stop = comp.genome_Size - start
-        seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
-    elif strand == '+':
-        seq = (comp.genome_Seq[start - 1:stop])
+def nuc_Count(verbose, start, stop, strand):  # Gets correct seq then returns GC
+    if stop >= comp.genome_Size:
+        if verbose == True:
+            print("There is a wrap around gene and I am dealing with it the best I can - Start: " + str(start) + " Stop: " + str(stop))
+        extra_stop = stop - comp.genome_Size
+        stop = comp.genome_Size
+        if strand == '-':
+            r_Start = comp.genome_Size - stop
+            r_Stop = comp.genome_Size - start
+            seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
+            extra_seq = (comp.genome_Seq_Rev[-extra_stop-1:])
+            seq = extra_seq+seq
+        elif strand == '+':
+            seq = comp.genome_Seq[start - 1:stop]
+            extra_seq = comp.genome_Seq[:extra_stop +1]
+            seq = seq+extra_seq
+            #seq = (comp.genome_Seq[start - 1:stop])
+    else:
+        if strand == '-':
+            r_Start = comp.genome_Size - stop
+            r_Stop = comp.genome_Size - start
+            seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
+        elif strand == '+':
+            seq = (comp.genome_Seq[start - 1:stop])
     c = 0
     a = 0
     g = 0
@@ -323,8 +340,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             comp.genes_Detected.update({str(gene_details): g_pos})
             match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
             perfect_Matched_Genes(g_Start, g_Stop, g_Strand)
-            if verbose == True:
-                print('Perfect Match')
+            #if verbose == True:
+            #    print('Perfect Match')
         elif perfect_Match == False and len(
                 overlapping_ORFs) == 1:  # If we do not have a perfect match but 1 ORF which has passed the filtering
             orf_Pos = list(overlapping_ORFs.keys())[0]
@@ -344,8 +361,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             comp.matched_ORFs.update({orf_Pos: m_ORF_Details})
             comp.genes_Detected.update({str(gene_details): orf_Pos})
             match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
-            if verbose == True:
-                print('Partial Match')
+            #if verbose == True:
+            #    print('Partial Match')
             partial_Hit_Calc(g_Start, g_Stop, g_Strand, o_Start, o_Stop)
         elif perfect_Match == False and len(
                 overlapping_ORFs) >= 1:  # If we have more than 1 potential ORF match, we check to see which is the 'best' hit
@@ -374,8 +391,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             genes_Unmatched(g_Start, g_Stop, g_Strand)  #
         else:
             genes_Unmatched(g_Start, g_Stop, g_Strand)  # No hit
-            if verbose == True:
-                print("No Hit")
+            #if verbose == True:
+            #    print("No Hit")
     for orf_Key in comp.matched_ORFs:  # Remove ORFs from out of frame if ORF was correctly matched to another Gene
         if orf_Key in comp.out_Of_Frame_ORFs:
             del comp.out_Of_Frame_ORFs[orf_Key]
@@ -409,7 +426,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         if gene_Length == 0: print(g_Start, g_Stop, "!!!!!!!!!!!!!!!!!!!!!!!!")
         comp.gene_Lengths.append(gene_Length)
         gene_Nuc_Array[g_Start - 1:g_Stop] = True  # Changing all between the two positions to 1's
-        comp.gene_GC.append(nuc_Count(g_Start, g_Stop, g_Strand))
+        comp.gene_GC.append(nuc_Count(verbose, g_Start, g_Stop, g_Strand))
         if gene_Length <= SHORT_ORF_LENGTH:  # .utils
             comp.gene_Short.append(gene_Length)
         ### Calculate overlapping Genes -
@@ -453,7 +470,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         orf_Length = (o_Stop - o_Start) +1
         comp.orf_Lengths.append(orf_Length)
         orf_Nuc_Array[o_Start - 1:o_Stop] = True  # Changing all between the two positions to 1's
-        comp.orf_GC.append(nuc_Count(o_Start, o_Stop, o_Strand))
+        comp.orf_GC.append(nuc_Count(verbose, o_Start, o_Stop, o_Strand))
         if orf_Length <= SHORT_ORF_LENGTH:  # .utils
             comp.orf_Short.append(orf_Length)
         ### Calculate overlapping ORFs -
@@ -487,7 +504,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         mo_Length = (mo_Stop - mo_Start)
         matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = True  # This is the complete matched orf not the matched orf bits
-        comp.m_ORF_GC.append(nuc_Count(mo_Start, mo_Stop, mo_Strand))
+        comp.m_ORF_GC.append(nuc_Count(verbose, mo_Start, mo_Stop, mo_Strand))
         if mo_Length <= SHORT_ORF_LENGTH:  # .utils
             comp.m_ORF_Short.append(mo_Length)
         ### Calculate overlapping Matched ORFs -

orforise-1.4.2/src/ORForise/Tools/GFF/GFF.py ADDED Viewed

@@ -0,0 +1,62 @@
+import collections
+import sys
+try:
+    from utils import revCompIterative
+    from utils import sortORFs
+except ImportError:
+    from ORForise.utils import revCompIterative
+    from ORForise.utils import sortORFs
+def GFF(*args):
+    tool_pred = args[0]
+    genome = args[1]
+    #types = args[2]
+    GFF_ORFs = collections.OrderedDict()
+    genome_size = len(genome)
+    genome_rev = revCompIterative(genome)
+    with open(tool_pred, 'r') as gff_input:
+        for line in gff_input:
+            if '#' not in line:
+                line = line.split('\t')
+                #gene_types = types.split(',') - Temporary fix
+                #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9:  # line[2] for normalrun
+                if 'CDS' in line[2] and len(line) == 9:
+                    start = int(line[3])
+                    stop = int(line[4])
+                    strand = line[6]
+                    info = line[8]
+                    if stop >= genome_size:
+                        extra_stop = stop - genome_size
+                        corrected_stop = genome_size
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - corrected_stop
+                            r_stop = genome_size - start
+                            seq = genome_rev[r_start:r_stop + 1]
+                            extra_seq = genome_rev[-extra_stop - 1:]
+                            seq = extra_seq+seq
+                            startCodon = seq[:3]
+                            stopCodon = seq[-3:]
+                        elif '+' in strand:
+                            seq = genome[start -1 :corrected_stop]
+                            extra_seq = genome[:extra_stop +1]
+                            seq = seq+extra_seq
+                            startCodon = seq[:3]
+                            stopCodon = seq[-3:]
+                    else:
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in strand:
+                            startCodon = genome[start - 1:start + 2]
+                            stopCodon = genome[stop - 3:stop]
+                    po = str(start) + ',' + str(stop)
+                    orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
+                    GFF_ORFs.update({po: orf})
+                # elif "CDS" in line[2]:
+                #     sys.exit("SAS")
+    GFF_ORFs = sortORFs(GFF_ORFs)
+    return GFF_ORFs

{ORForise-1.4.1 → orforise-1.4.2}/src/ORForise/utils.py RENAMED Viewed

@@ -4,7 +4,7 @@ import collections
 # Constants
 SHORT_ORF_LENGTH = 300
 MIN_COVERAGE = 75
-ORForise_Version = 'v1.4.1'
+ORForise_Version = 'v1.4.2'
 def revCompIterative(watson):  # Gets Reverse Complement

{ORForise-1.4.1 → orforise-1.4.2}/src/ORForise.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ORForise
-Version: 1.4.1
+Version: 1.4.2
 Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
 Home-page: https://github.com/NickJD/ORForise
 Author: Nicholas Dimonaco
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
 Requires-Python: >=3.6
 Description-Content-Type: text/markdown
 License-File: LICENSE
+Requires-Dist: numpy
 # ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
 ## Published in Bioinformatics :   https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
@@ -61,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
                              [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.1: Annotatione-Compare Run Parameters.
+ORForise v1.4.2: Annotatione-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -111,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
                             [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.1: Aggregate-Compare Run Parameters.
+ORForise v1.4.2: Aggregate-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -265,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
                     OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
-ORForise v1.4.1: GFF-Adder Run Parameters.
+ORForise v1.4.2: GFF-Adder Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -327,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
                           ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
-ORForise v1.4.1: GFF-Intersector Run Parameters.
+ORForise v1.4.2: GFF-Intersector Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on

ORForise-1.4.1/src/ORForise/Tools/GFF/GFF.py DELETED Viewed

@@ -1,45 +0,0 @@
-import collections
-import sys
-try:
-    from utils import revCompIterative
-    from utils import sortORFs
-except ImportError:
-    from ORForise.utils import revCompIterative
-    from ORForise.utils import sortORFs
-def GFF(*args):
-    tool_pred = args[0]
-    genome = args[1]
-    #types = args[2]
-    GFF_ORFs = collections.OrderedDict()
-    genome_size = len(genome)
-    genome_rev = revCompIterative(genome)
-    with open(tool_pred, 'r') as gff_input:
-        for line in gff_input:
-            if '#' not in line:
-                line = line.split('\t')
-                #gene_types = types.split(',') - Temporary fix
-                #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9:  # line[2] for normalrun
-                if 'CDS' in line[2] and len(line) == 9:
-                    start = int(line[3])
-                    stop = int(line[4])
-                    strand = line[6]
-                    info = line[8]
-                    #name = line[8].split('Name=')[1].split(';')[0] # Issue with multiple records for each gene.
-                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in strand:
-                        startCodon = genome[start - 1:start + 2]
-                        stopCodon = genome[stop - 3:stop]
-                    po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
-                    GFF_ORFs.update({po: orf})
-                # elif "CDS" in line[2]:
-                #     sys.exit("SAS")
-    GFF_ORFs = sortORFs(GFF_ORFs)
-    return GFF_ORFs