PyPI - ORForise - Versions diffs - 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl - Mend

ORForise 1.4.0py3-none-any.whl → 1.4.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

ORForise/Annotation_Compare.py +4 -8
ORForise/Comparator.py +68 -48
ORForise/GFF_Adder.py +0 -2
ORForise/StORForise.py +10 -10
ORForise/Tools/GFF/GFF.py +30 -12
ORForise/Tools/StORF_Reporter/StORF_Reporter.py +4 -3
ORForise/utils.py +2 -13
{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/METADATA +6 -6
{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/RECORD +13 -13
{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/WHEEL +1 -1
{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/LICENSE +0 -0
{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/entry_points.txt +0 -0
{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/top_level.txt +0 -0

ORForise/Annotation_Compare.py CHANGED Viewed

@@ -16,12 +16,8 @@ except ImportError:
 ##########################
 def comparator(options):
-    genome_Seq = ""
-    with open(options.genome_DNA, 'r') as genome:
-        for line in genome:
-            line = line.replace("\n", "")
-            if not line.startswith('>'):
-                genome_Seq += str(line)
+    with open(options.genome_DNA, mode='r') as genome:
+        genome_Seq = "".join(line.rstrip() for line in genome if not line.startswith('>'))
     ##############################################
     if not options.reference_tool:  # IF using Ensembl for comparison
         ref_genes = collections.OrderedDict()  # Order is important
@@ -76,11 +72,11 @@ def comparator(options):
     rep_metric_description = list(all_rep_Metrics.keys())
     rep_metrics = list(all_rep_Metrics.values())
     ############## Printing to std-out and optional csv file
-    print('Genome Used: ' + str(options.reference_annotation.split('/')[-1]))
+    print('Genome Used: ' + str(options.genome_DNA.split('/')[-1]))
     if options.reference_tool:
         print('Reference Tool Used: '+str(options.reference_tool))
     else:
-        print('Reference Used: ' + str(options.reference_annotation))
+        print('Reference Used: ' + str(options.reference_annotation.split('/')[-1]))
     print('Tool Compared: '+str(options.tool))
     print('Perfect Matches: ' + str(len(perfect_Matches)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(perfect_Matches)/len(ref_genes),'.2f')+'%')
     print('Partial Matches: ' + str(len(partial_Hits)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(partial_Hits)/len(ref_genes),'.2f')+'%')

ORForise/Comparator.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import numpy as np
 try:
     from utils import *
 except ImportError:
@@ -46,14 +45,32 @@ comp = comparator()
 #     else:
 #         print ('Key not found')
-def nuc_Count(start, stop, strand):  # Gets correct seq then returns GC
-    if strand == '-':
-        r_Start = comp.genome_Size - stop
-        r_Stop = comp.genome_Size - start
-        seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
-    elif strand == '+':
-        seq = (comp.genome_Seq[start - 1:stop])
+def is_double_range(range1, range2):
+    return len(range1) >= 2 * len(range2)
+def nuc_Count(verbose, start, stop, strand):  # Gets correct seq then returns GC
+    if stop >= comp.genome_Size:
+        if verbose == True:
+            print("There is a wrap around gene and I am dealing with it the best I can - Start: " + str(start) + " Stop: " + str(stop))
+        extra_stop = stop - comp.genome_Size
+        stop = comp.genome_Size
+        if strand == '-':
+            r_Start = comp.genome_Size - stop
+            r_Stop = comp.genome_Size - start
+            seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
+            extra_seq = (comp.genome_Seq_Rev[-extra_stop-1:])
+            seq = extra_seq+seq
+        elif strand == '+':
+            seq = comp.genome_Seq[start - 1:stop]
+            extra_seq = comp.genome_Seq[:extra_stop +1]
+            seq = seq+extra_seq
+            #seq = (comp.genome_Seq[start - 1:stop])
+    else:
+        if strand == '-':
+            r_Start = comp.genome_Size - stop
+            r_Stop = comp.genome_Size - start
+            seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
+        elif strand == '+':
+            seq = (comp.genome_Seq[start - 1:stop])
     c = 0
     a = 0
     g = 0
@@ -263,6 +280,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
     comp.genome_Seq = genome
     comp.genome_Seq_Rev = revCompIterative(genome)
     comp.genome_Size = len(genome)
+    better_pos_orfs_items = [[(int(pos.split(',')[0]), int(pos.split(',')[1])), orf_Details] for pos, orf_Details in orfs.items()] #TODO: turn pos into tuple instead of string everywhere
     for gene_num, gene_details in ref_genes.items():  # Loop through each gene to compare against predicted ORFs
         g_Start = int(gene_details[0])
         g_Stop = int(gene_details[1])
@@ -273,9 +293,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         overlapping_ORFs = collections.OrderedDict()
         perfect_Match = False
         out_Frame = False
-        for pos, orf_Details in orfs.items():  # Check if perfect match, if not check if match covers at least 75% of gene - Loop through ALL ORFs - SLOW
-            o_Start = int(pos.split(',')[0])
-            o_Stop = int(pos.split(',')[1])
+        for pos, orf_Details in better_pos_orfs_items:  # Check if perfect match, if not check if match covers at least 75% of gene - Loop through ALL ORFs - SLOW
+            o_Start,o_Stop = pos
             o_Strand = orf_Details[0]
             #orf_Set = set(range(o_Start, o_Stop + 1)) Removed for optimisation
             if o_Stop <= g_Start or o_Start >= g_Stop:  # Not caught up yet
@@ -283,15 +302,17 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             elif o_Start == g_Start and o_Stop == g_Stop:  # If perfect match, break and skip the rest of the ORFs
                 perfect_Match = True
                 break
+            elif is_double_range(range(o_Start, o_Stop), range(g_Start,g_Stop)):  # If ORF is double or more than the length of the gene, we do not count as found.
+                continue
             elif g_Start <= o_Start < g_Stop or g_Start < o_Stop < g_Stop:  # If ORF Start or Stop is between gene Start or Stop
                 #overlap = len(gene_Set.intersection(orf_Set)) # Replaced for optimisation
                 overlap = max(min(o_Stop, g_Stop) - max(o_Start, g_Start), -1) + 1
                 coverage = 100 * float(overlap) / float(len(gene_Set))
                 orf_Details.append(coverage)
                 if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and coverage >= MIN_COVERAGE:  # Only continue if ORF covers at least 75% of the gene and is in frame
-                    overlapping_ORFs.update({pos: orf_Details})
+                    overlapping_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
                 elif coverage >= MIN_COVERAGE:  # Not in frame / on same strand
-                    comp.out_Of_Frame_ORFs.update({pos: orf_Details})
+                    comp.out_Of_Frame_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
                     out_Frame = True
             elif o_Start <= g_Start and o_Stop >= g_Stop:  # If ORF extends one or both ends of the gene
                 #overlap = len(gene_Set.intersection(orf_Set)) # Replaced for optimisation
@@ -299,9 +320,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
                 coverage = 100 * float(overlap) / float(len(gene_Set))
                 orf_Details.append(coverage)
                 if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and coverage >= MIN_COVERAGE:  # Only continue if ORF covers at least 75% of the gene and is in frame
-                    overlapping_ORFs.update({pos: orf_Details})
+                    overlapping_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
                 elif coverage >= MIN_COVERAGE:
-                    comp.out_Of_Frame_ORFs.update({pos: orf_Details})
+                    comp.out_Of_Frame_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
                     out_Frame = True
             else:
                 if verbose == True:
@@ -319,8 +340,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             comp.genes_Detected.update({str(gene_details): g_pos})
             match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
             perfect_Matched_Genes(g_Start, g_Stop, g_Strand)
-            if verbose == True:
-                print('Perfect Match')
+            #if verbose == True:
+            #    print('Perfect Match')
         elif perfect_Match == False and len(
                 overlapping_ORFs) == 1:  # If we do not have a perfect match but 1 ORF which has passed the filtering
             orf_Pos = list(overlapping_ORFs.keys())[0]
@@ -340,8 +361,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             comp.matched_ORFs.update({orf_Pos: m_ORF_Details})
             comp.genes_Detected.update({str(gene_details): orf_Pos})
             match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
-            if verbose == True:
-                print('Partial Match')
+            #if verbose == True:
+            #    print('Partial Match')
             partial_Hit_Calc(g_Start, g_Stop, g_Strand, o_Start, o_Stop)
         elif perfect_Match == False and len(
                 overlapping_ORFs) >= 1:  # If we have more than 1 potential ORF match, we check to see which is the 'best' hit
@@ -370,8 +391,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             genes_Unmatched(g_Start, g_Stop, g_Strand)  #
         else:
             genes_Unmatched(g_Start, g_Stop, g_Strand)  # No hit
-            if verbose == True:
-                print("No Hit")
+            #if verbose == True:
+            #    print("No Hit")
     for orf_Key in comp.matched_ORFs:  # Remove ORFs from out of frame if ORF was correctly matched to another Gene
         if orf_Key in comp.out_Of_Frame_ORFs:
             del comp.out_Of_Frame_ORFs[orf_Key]
@@ -391,9 +412,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
     atg_P, gtg_P, ttg_P, att_P, ctg_P, other_Start_P, other_Starts = start_Codon_Count(orfs)
     tag_P, taa_P, tga_P, other_Stop_P, other_Stops = stop_Codon_Count(orfs)
     # Count nucleotides found from ALL ORFs
-    gene_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.int)
-    orf_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.int)
-    matched_ORF_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.int)
+    gene_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
+    orf_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
+    matched_ORF_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
     prev_Gene_Stop = 0
     prev_Gene_Overlapped = False
@@ -401,10 +422,11 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         g_Start = int(gene_details[0])
         g_Stop = int(gene_details[1])
         g_Strand = gene_details[2]
-        gene_Length = (g_Stop - g_Start)
+        gene_Length = (g_Stop - g_Start) +1
+        if gene_Length == 0: print(g_Start, g_Stop, "!!!!!!!!!!!!!!!!!!!!!!!!")
         comp.gene_Lengths.append(gene_Length)
-        gene_Nuc_Array[g_Start - 1:g_Stop] = [1]  # Changing all between the two positions to 1's
-        comp.gene_GC.append(nuc_Count(g_Start, g_Stop, g_Strand))
+        gene_Nuc_Array[g_Start - 1:g_Stop] = True  # Changing all between the two positions to 1's
+        comp.gene_GC.append(nuc_Count(verbose, g_Start, g_Stop, g_Strand))
         if gene_Length <= SHORT_ORF_LENGTH:  # .utils
             comp.gene_Short.append(gene_Length)
         ### Calculate overlapping Genes -
@@ -445,10 +467,10 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
             comp.pos_Strand += 1
         elif o_Strand == "-":
             comp.neg_Strand += 1
-        orf_Length = (o_Stop - o_Start)
+        orf_Length = (o_Stop - o_Start) +1
         comp.orf_Lengths.append(orf_Length)
-        orf_Nuc_Array[o_Start - 1:o_Stop] = [1]  # Changing all between the two positions to 1's
-        comp.orf_GC.append(nuc_Count(o_Start, o_Stop, o_Strand))
+        orf_Nuc_Array[o_Start - 1:o_Stop] = True  # Changing all between the two positions to 1's
+        comp.orf_GC.append(nuc_Count(verbose, o_Start, o_Stop, o_Strand))
         if orf_Length <= SHORT_ORF_LENGTH:  # .utils
             comp.orf_Short.append(orf_Length)
         ### Calculate overlapping ORFs -
@@ -480,9 +502,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         mo_Stop = int(mo_Positions.split(',')[1])
         mo_Strand = m_ORF_Details[0]
         mo_Length = (mo_Stop - mo_Start)
-        matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = [1]  # This is the complete matched orf not the matched orf bits
+        matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = True  # This is the complete matched orf not the matched orf bits
-        comp.m_ORF_GC.append(nuc_Count(mo_Start, mo_Stop, mo_Strand))
+        comp.m_ORF_GC.append(nuc_Count(verbose, mo_Start, mo_Stop, mo_Strand))
         if mo_Length <= SHORT_ORF_LENGTH:  # .utils
             comp.m_ORF_Short.append(mo_Length)
         ### Calculate overlapping Matched ORFs -
@@ -506,30 +528,28 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
         elif '-' in mo_Strand:
             comp.m_ORF_Neg_Olap.append(0)
     ####
-    gene_Coverage_Genome = format(100 * np.count_nonzero(gene_Nuc_Array) / comp.genome_Size, '.2f')
-    orf_Coverage_Genome = format(100 * np.count_nonzero(orf_Nuc_Array) / comp.genome_Size, '.2f')
-    matched_ORF_Coverage_Genome = format(100 * np.count_nonzero(matched_ORF_Nuc_Array) / comp.genome_Size,
+    gene_Coverage_Genome = format(100 * np.sum(gene_Nuc_Array) / comp.genome_Size, '.2f')
+    orf_Coverage_Genome = format(100 * np.sum(orf_Nuc_Array) / comp.genome_Size, '.2f')
+    matched_ORF_Coverage_Genome = format(100 * np.sum(matched_ORF_Nuc_Array) / comp.genome_Size,
                                          '.2f')  # This gets the nts which are in matched ORFs - Check below
     # matched_ORF_Nuc_AND_Gene = np.logical_and(matched_ORF_Nuc_Array,gene_Nuc_Array) + [0 for i in range(len(gene_Nuc_Array))] # This gets the nts which are in both matched ORFs and detected genes
     # matched_ORF_Coverage_Genome = format(100 * np.count_nonzero(matched_ORF_Nuc_AND_Gene) / comp.genome_Size,'.2f')
     # gene and orf nucleotide Intersection
-    gene_ORF_Nuc_Intersection = np.count_nonzero(gene_Nuc_Array & orf_Nuc_Array)
+    gene_ORF_Nuc_Intersection = np.sum(gene_Nuc_Array & orf_Nuc_Array)
     # not gene but orf nucleotides
-    not_Gene_Nuc_Array = np.logical_not(gene_Nuc_Array) + [0 for i in range(
-        len(gene_Nuc_Array))]  # End part to keep array as 1,0 not T,F
-    not_Gene_Nuc_And_ORF_Count = np.count_nonzero(not_Gene_Nuc_Array & orf_Nuc_Array)
+    not_Gene_Nuc_Array = np.logical_not(gene_Nuc_Array)
+    not_Gene_Nuc_And_ORF_Count = np.sum(not_Gene_Nuc_Array & orf_Nuc_Array)
     # not orf nucleotides but gene
-    not_ORF_Nuc_Array = np.logical_not(orf_Nuc_Array) + [0 for i in range(
-        len(orf_Nuc_Array))]  # End part to keep array as 1,0 not T,F
-    not_ORF_Nuc_And_Gene_Count = np.count_nonzero(not_ORF_Nuc_Array & gene_Nuc_Array)
+    not_ORF_Nuc_Array = np.logical_not(orf_Nuc_Array)
+    not_ORF_Nuc_And_Gene_Count = np.sum(not_ORF_Nuc_Array & gene_Nuc_Array)
     # not gene or orf nucleotides
-    not_Gene_Nuc_Not_ORF_Nuc_Count = np.count_nonzero(not_Gene_Nuc_Array & not_ORF_Nuc_Array)
+    not_Gene_Nuc_Not_ORF_Nuc_Count = np.sum(not_Gene_Nuc_Array & not_ORF_Nuc_Array)
     # Nucleotide 'accuracy' - Normalised by number of nucelotides annotated by a gene
-    NT_TP = format(gene_ORF_Nuc_Intersection / np.count_nonzero(gene_Nuc_Array), '.2f')
-    NT_FP = format(not_Gene_Nuc_And_ORF_Count / np.count_nonzero(not_Gene_Nuc_Array), '.2f')
-    NT_FN = format(not_ORF_Nuc_And_Gene_Count / np.count_nonzero(gene_Nuc_Array), '.2f')
-    NT_TN = format(not_Gene_Nuc_Not_ORF_Nuc_Count / np.count_nonzero(not_Gene_Nuc_Array), '.2f')
+    NT_TP = format(gene_ORF_Nuc_Intersection / np.sum(gene_Nuc_Array), '.2f')
+    NT_FP = format(not_Gene_Nuc_And_ORF_Count / np.sum(not_Gene_Nuc_Array), '.2f')
+    NT_FN = format(not_ORF_Nuc_And_Gene_Count / np.sum(gene_Nuc_Array), '.2f')
+    NT_TN = format(not_Gene_Nuc_Not_ORF_Nuc_Count / np.sum(not_Gene_Nuc_Array), '.2f')
     NT_Precision = format(gene_ORF_Nuc_Intersection / (gene_ORF_Nuc_Intersection + not_Gene_Nuc_And_ORF_Count), '.2f')
     NT_Recall = format(gene_ORF_Nuc_Intersection / (gene_ORF_Nuc_Intersection + not_ORF_Nuc_And_Gene_Count), '.2f')
     NT_False_Discovery_Rate = format(

ORForise/GFF_Adder.py CHANGED Viewed

@@ -30,8 +30,6 @@ def gff_writer(options,genome_ID, genome_DNA, reference_annotation, reference_to
     for pos, data in combined_ORFs.items():
         pos_ = pos.split(',')
-        if '15040' in pos:
-            print(2)
         start = pos_[0]
         stop = pos_[-1]
         strand = data[0]

ORForise/StORForise.py CHANGED Viewed

@@ -11,7 +11,7 @@ from Comparator import tool_comparison
 def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare):
     genome_Seq = ""
-    with open('Genomes/' + genome_to_compare + '.fa', 'r') as genome:
+    with open(genome_to_compare, 'r') as genome:
         for line in genome:
             line = line.replace("\n", "")
             if ">" not in line:
@@ -19,23 +19,23 @@ def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare
     ##############################################
     genes = collections.OrderedDict()
     count = 0
-    with open('Tools/StORF_Undetected/' + input_to_analyse, 'r') as genome_gff:  # Get list of missed genes
+    with open(input_to_analyse, 'r') as genome_gff:  # Get list of missed genes
         for line in genome_gff:
             if ">" in line:
                 line = line.strip()
-                Start = int(line.split('_')[1])
-                Stop = int(line.split('_')[2])
-                Strand = line.split('_')[3]
-                Gene = str(Start) + ',' + str(Stop) + ',' + Strand
-                genes.update({count: Gene})
+                start = int(line.split('_')[1])
+                stop = int(line.split('_')[2])
+                strand = line.split('_')[3]
+                gene_details = [start,stop,strand]
+                genes.update({count: gene_details})
                 count += 1
     ##################################
     tool_predictions = import_module('Tools.' + tool + '.' + tool)
     tool_predictions = getattr(tool_predictions, tool)
     orfs = tool_predictions(storfs_to_find_missing, genome_Seq)
-    all_Metrics, all_rep_Metrics, start_precision, stop_precision, other_starts, other_stops, missed_genes, unmatched_orfs, undetected_gene_metrics, unmatched_orf_metrics, gene_coverage_genome, multi_Matched_ORFs, partial_Hits = tool_comparison(
-        genes, orfs, genome_Seq)
-    outname = tool + '_' + genome_to_compare
+    all_Metrics, all_rep_Metrics, start_precision, stop_precision, other_starts, other_stops, perfect_Matches, missed_genes, unmatched_orfs, undetected_gene_metrics, unmatched_orf_metrics, orf_Coverage_Genome, matched_ORF_Coverage_Genome, gene_coverage_genome, multi_Matched_ORFs, partial_Hits = tool_comparison(
+        genes, orfs, genome_Seq,True)
+    outname = tool + '_' + genome_to_compare.split('/')[-1].split('.')[0]
     metric_description = list(all_Metrics.keys())
     metrics = list(all_Metrics.values())
     rep_metric_description = list(all_rep_Metrics.keys())

ORForise/Tools/GFF/GFF.py CHANGED Viewed

@@ -11,7 +11,7 @@ except ImportError:
 def GFF(*args):
     tool_pred = args[0]
     genome = args[1]
-    types = args[2]
+    #types = args[2]
     GFF_ORFs = collections.OrderedDict()
     genome_size = len(genome)
     genome_rev = revCompIterative(genome)
@@ -19,21 +19,39 @@ def GFF(*args):
         for line in gff_input:
             if '#' not in line:
                 line = line.split('\t')
-                gene_types = types.split(',')
-                if any(gene_type == line[2] for gene_type in gene_types)and len(line) == 9:  # line[2] for normalrun
+                #gene_types = types.split(',') - Temporary fix
+                #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9:  # line[2] for normalrun
+                if 'CDS' in line[2] and len(line) == 9:
                     start = int(line[3])
                     stop = int(line[4])
                     strand = line[6]
                     info = line[8]
-                    #name = line[8].split('Name=')[1].split(';')[0] # Issue with multiple records for each gene.
-                    if '-' in strand:  # Reverse Compliment starts and stops adjusted
-                        r_start = genome_size - stop
-                        r_stop = genome_size - start
-                        startCodon = genome_rev[r_start:r_start + 3]
-                        stopCodon = genome_rev[r_stop - 2:r_stop + 1]
-                    elif '+' in strand:
-                        startCodon = genome[start - 1:start + 2]
-                        stopCodon = genome[stop - 3:stop]
+                    if stop >= genome_size:
+                        extra_stop = stop - genome_size
+                        corrected_stop = genome_size
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - corrected_stop
+                            r_stop = genome_size - start
+                            seq = genome_rev[r_start:r_stop + 1]
+                            extra_seq = genome_rev[-extra_stop - 1:]
+                            seq = extra_seq+seq
+                            startCodon = seq[:3]
+                            stopCodon = seq[-3:]
+                        elif '+' in strand:
+                            seq = genome[start -1 :corrected_stop]
+                            extra_seq = genome[:extra_stop +1]
+                            seq = seq+extra_seq
+                            startCodon = seq[:3]
+                            stopCodon = seq[-3:]
+                    else:
+                        if '-' in strand:  # Reverse Compliment starts and stops adjusted
+                            r_start = genome_size - stop
+                            r_stop = genome_size - start
+                            startCodon = genome_rev[r_start:r_start + 3]
+                            stopCodon = genome_rev[r_stop - 2:r_stop + 1]
+                        elif '+' in strand:
+                            startCodon = genome[start - 1:start + 2]
+                            stopCodon = genome[stop - 3:stop]
                     po = str(start) + ',' + str(stop)
                     orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
                     GFF_ORFs.update({po: orf})

ORForise/Tools/StORF_Reporter/StORF_Reporter.py CHANGED Viewed

@@ -14,12 +14,13 @@ def StORF_Reporter(tool_pred, genome):
     genome_rev = revCompIterative(genome)
     with open(tool_pred, 'r') as storf_input:
         for line in storf_input:
-            if '#' not in line:
+            if not line.startswith('#') and not line.startswith('\n'):
                 line = line.split()
-                if 'StORF_Reporter' in line[1] or 'StoRF_Reporter' in line[1]: # need to harmonise this.
+                if 'StORF_Reporter' in line[1] or 'StoRF_Reporter' in line[1]  or 'StORF' in line[1] or 'StORF-Reporter' in line[1]: # need to harmonise this.
                     start = int(line[3])
                     stop = int(line[4])
                     strand = line[6]
+                    info = line[8]
                     if '-' in strand:  # Reverse Compliment starts and stops adjusted
                         r_start = genome_size - stop
                         r_stop = genome_size - start
@@ -29,7 +30,7 @@ def StORF_Reporter(tool_pred, genome):
                         startCodon = genome[start:start + 3]
                         stopCodon = genome[stop - 3:stop]
                     po = str(start) + ',' + str(stop)
-                    orf = [strand, startCodon, stopCodon, line[2]] # StORF/Con-StORF or CDS??
+                    orf = [strand, startCodon, stopCodon, 'CDS', info] # StORF/Con-StORF or CDS??
                     storf_orfs.update({po: orf})
     storf_orfs = sortORFs(storf_orfs)

ORForise/utils.py CHANGED Viewed

@@ -4,22 +4,11 @@ import collections
 # Constants
 SHORT_ORF_LENGTH = 300
 MIN_COVERAGE = 75
-ORForise_Version = 'v1.4.0'
+ORForise_Version = 'v1.4.2'
 def revCompIterative(watson):  # Gets Reverse Complement
-    complements = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C', 'N': 'N',
-                   'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W', 'K': 'M',
-                   'M': 'K', 'V': 'B', 'B': 'V', 'H': 'D', 'D': 'H'}
-    watson = watson.upper()
-    watsonrev = watson[::-1]
-    crick = ""
-    for nt in watsonrev:
-        try:
-            crick += complements[nt]
-        except KeyError:
-            crick += nt  # Do not modify non-standard DNA
-    return crick
+    return watson.upper()[::-1].translate(str.maketrans("ATCGRYKMVBHD","TAGCYRMKBVDH"))
 def sortORFs(tool_ORFs):  # Will only sort by given start position

{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: ORForise
-Version: 1.4.0
+Version: 1.4.2
 Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
 Home-page: https://github.com/NickJD/ORForise
 Author: Nicholas Dimonaco
@@ -21,7 +21,7 @@ Requires-Dist: numpy
 # Requirements and Installation:
-### The ORForise platform is written in Python3.8 and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
+### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
 ## Intallation:
@@ -62,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
                              [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.0: Annotatione-Compare Run Parameters.
+ORForise v1.4.2: Annotatione-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -112,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
                             [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
-ORForise v1.4.0: Aggregate-Compare Run Parameters.
+ORForise v1.4.2: Aggregate-Compare Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -266,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
                     OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
-ORForise v1.4.0: GFF-Adder Run Parameters.
+ORForise v1.4.2: GFF-Adder Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on
@@ -328,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
 usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
                           ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
-ORForise v1.4.0: GFF-Intersector Run Parameters.
+ORForise v1.4.2: GFF-Intersector Run Parameters.
 Required Arguments:
   -dna GENOME_DNA       Genome DNA file (.fa) which both annotations are based on

{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/RECORD RENAMED Viewed

@@ -1,11 +1,11 @@
 ORForise/Aggregate_Compare.py,sha256=cY0PdA_SnywPcqwPomXmEHaZ6OUDS9k_QeLtXnewjiA,10648
-ORForise/Annotation_Compare.py,sha256=boAW-kWgY9lUobkj5nhK1JKLowtxXNuMg_2oCiKEBqU,10298
-ORForise/Comparator.py,sha256=7AYy5UxxTMpK51pR49U8MoX-j7XlAhUP_Sp7HSMoVSY,43921
-ORForise/GFF_Adder.py,sha256=IdEDlRBl2gv2ie7cyQ_Kq51GUipRleUSPjMfLO6OBQE,14105
+ORForise/Annotation_Compare.py,sha256=6y_RiJg0q9g4Bcwy8Lxi5gSDkMLwm6uYJG2evxnKAhU,10228
+ORForise/Comparator.py,sha256=AEpZQ8IURgYrWLKRRQEBUp3nFWKsxTb0f3O6XdHfRAc,45041
+ORForise/GFF_Adder.py,sha256=-BlF6DQWcbhyYT88M0ZkoaWA2YDDxsby-7jksfeJN1Q,14057
 ORForise/GFF_Intersector.py,sha256=EcDKyJr_47066kma2CguMf3uwzB2tYomPDFjmoX8IoU,9900
-ORForise/StORForise.py,sha256=BjZ0Qr4_8uOWtKZ3llZlitYevW4eX0TIjCgtBUpl4gQ,5387
+ORForise/StORForise.py,sha256=2QU6q3wPK6iqtyKg2jEVwFTB4bSymyc-mSpk7T8yNaY,5431
 ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ORForise/utils.py,sha256=pa4qb2uMxS9zm4wh3vQgWmy63k5GsZbaPPSTzotcFdw,1465
+ORForise/utils.py,sha256=BeYOERE3UfBXpazmLDOQDzXj-bGbXd9oooWyPC1Ts1s,1099
 ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
 ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
@@ -26,7 +26,7 @@ ORForise/Tools/FGENESB/FGENESB.py,sha256=TCvsGzfZ41tKkgF6TaBFpsuZBrueSygmoBco7d6
 ORForise/Tools/FGENESB/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Tools/FragGeneScan/FragGeneScan.py,sha256=l3lqIxRUEx7lIV8Odhm6NsTgfHTrriYXcFoA4WW-E-E,1376
 ORForise/Tools/FragGeneScan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ORForise/Tools/GFF/GFF.py,sha256=IGePhSnfxXs6_31UsbyZ3buiAZOaTG70Js6g0Scnaqo,1818
+ORForise/Tools/GFF/GFF.py,sha256=RF-PtryGTV0Lgz6sT7L5idVEwCF_MP0prIcfaUYCoAQ,2806
 ORForise/Tools/GFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Tools/GLIMMER_3/GLIMMER_3.py,sha256=9WQNSdlhQOpHQ4zcxncrTb2Lt6tiUB8Y0FBoyGxG_Yc,1723
 ORForise/Tools/GLIMMER_3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -50,7 +50,7 @@ ORForise/Tools/Prodigal/Prodigal.py,sha256=8-MJrEbhSL4sbNjI1JEUZ1jm5PRz9OUBdlyD8
 ORForise/Tools/Prodigal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Tools/Prokka/Prokka.py,sha256=akq0lu2TbOqLt-GI27a0Zbh8yfJIVAHBi07FtCfCAcY,1537
 ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ORForise/Tools/StORF_Reporter/StORF_Reporter.py,sha256=QBYdDTO166iQlUdfogIJQ9bA_20qt8QLXPGw6d-Jan4,1477
+ORForise/Tools/StORF_Reporter/StORF_Reporter.py,sha256=mljwJO1iNy1HxcuqHAqH5ODDuLomw9HcRwOEJDScNQc,1609
 ORForise/Tools/StORF_Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 ORForise/Tools/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
 ORForise/Tools/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -60,9 +60,9 @@ ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa
 ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
 ORForise/Tools/TransDecoder/TransDecoder.py,sha256=utnL52il6BGbbBxoizYPnY1qwBGeslYDCa5xU9RGWPg,1384
 ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-ORForise-1.4.0.dist-info/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
-ORForise-1.4.0.dist-info/METADATA,sha256=382JS9al3W80Jn82YH3gxxdOS1piBSzmihVy8TxsLfw,36450
-ORForise-1.4.0.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
-ORForise-1.4.0.dist-info/entry_points.txt,sha256=ss2cbLmljRmLIeZ3t48p_06NuQuRiKeA11IOUYg_uiY,246
-ORForise-1.4.0.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
-ORForise-1.4.0.dist-info/RECORD,,
+ORForise-1.4.2.dist-info/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
+ORForise-1.4.2.dist-info/METADATA,sha256=kv8pem6rn0yrjNtc9Gkm-RZvWsafVx866aCjUIdti5c,36457
+ORForise-1.4.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
+ORForise-1.4.2.dist-info/entry_points.txt,sha256=ss2cbLmljRmLIeZ3t48p_06NuQuRiKeA11IOUYg_uiY,246
+ORForise-1.4.2.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
+ORForise-1.4.2.dist-info/RECORD,,

{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: bdist_wheel (0.40.0)
+Generator: bdist_wheel (0.43.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{ORForise-1.4.0.dist-info → ORForise-1.4.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

ORForise 1.4.0__py3-none-any.whl → 1.4.2__py3-none-any.whl

ORForise 1.4.0py3-none-any.whl → 1.4.2py3-none-any.whl