ORForise 1.4.1__py3-none-any.whl → 1.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Annotation_Compare.py +2 -2
- ORForise/Comparator.py +33 -16
- ORForise/Tools/GFF/GFF.py +26 -9
- ORForise/utils.py +1 -1
- {ORForise-1.4.1.dist-info → ORForise-1.4.2.dist-info}/METADATA +5 -5
- {ORForise-1.4.1.dist-info → ORForise-1.4.2.dist-info}/RECORD +10 -10
- {ORForise-1.4.1.dist-info → ORForise-1.4.2.dist-info}/WHEEL +1 -1
- {ORForise-1.4.1.dist-info → ORForise-1.4.2.dist-info}/LICENSE +0 -0
- {ORForise-1.4.1.dist-info → ORForise-1.4.2.dist-info}/entry_points.txt +0 -0
- {ORForise-1.4.1.dist-info → ORForise-1.4.2.dist-info}/top_level.txt +0 -0
ORForise/Annotation_Compare.py
CHANGED
|
@@ -72,11 +72,11 @@ def comparator(options):
|
|
|
72
72
|
rep_metric_description = list(all_rep_Metrics.keys())
|
|
73
73
|
rep_metrics = list(all_rep_Metrics.values())
|
|
74
74
|
############## Printing to std-out and optional csv file
|
|
75
|
-
print('Genome Used: ' + str(options.
|
|
75
|
+
print('Genome Used: ' + str(options.genome_DNA.split('/')[-1]))
|
|
76
76
|
if options.reference_tool:
|
|
77
77
|
print('Reference Tool Used: '+str(options.reference_tool))
|
|
78
78
|
else:
|
|
79
|
-
print('Reference Used: ' + str(options.reference_annotation))
|
|
79
|
+
print('Reference Used: ' + str(options.reference_annotation.split('/')[-1]))
|
|
80
80
|
print('Tool Compared: '+str(options.tool))
|
|
81
81
|
print('Perfect Matches: ' + str(len(perfect_Matches)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(perfect_Matches)/len(ref_genes),'.2f')+'%')
|
|
82
82
|
print('Partial Matches: ' + str(len(partial_Hits)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(partial_Hits)/len(ref_genes),'.2f')+'%')
|
ORForise/Comparator.py
CHANGED
|
@@ -47,13 +47,30 @@ comp = comparator()
|
|
|
47
47
|
|
|
48
48
|
def is_double_range(range1, range2):
|
|
49
49
|
return len(range1) >= 2 * len(range2)
|
|
50
|
-
def nuc_Count(start, stop, strand): # Gets correct seq then returns GC
|
|
51
|
-
if
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
50
|
+
def nuc_Count(verbose, start, stop, strand): # Gets correct seq then returns GC
|
|
51
|
+
if stop >= comp.genome_Size:
|
|
52
|
+
if verbose == True:
|
|
53
|
+
print("There is a wrap around gene and I am dealing with it the best I can - Start: " + str(start) + " Stop: " + str(stop))
|
|
54
|
+
extra_stop = stop - comp.genome_Size
|
|
55
|
+
stop = comp.genome_Size
|
|
56
|
+
if strand == '-':
|
|
57
|
+
r_Start = comp.genome_Size - stop
|
|
58
|
+
r_Stop = comp.genome_Size - start
|
|
59
|
+
seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
|
|
60
|
+
extra_seq = (comp.genome_Seq_Rev[-extra_stop-1:])
|
|
61
|
+
seq = extra_seq+seq
|
|
62
|
+
elif strand == '+':
|
|
63
|
+
seq = comp.genome_Seq[start - 1:stop]
|
|
64
|
+
extra_seq = comp.genome_Seq[:extra_stop +1]
|
|
65
|
+
seq = seq+extra_seq
|
|
66
|
+
#seq = (comp.genome_Seq[start - 1:stop])
|
|
67
|
+
else:
|
|
68
|
+
if strand == '-':
|
|
69
|
+
r_Start = comp.genome_Size - stop
|
|
70
|
+
r_Stop = comp.genome_Size - start
|
|
71
|
+
seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
|
|
72
|
+
elif strand == '+':
|
|
73
|
+
seq = (comp.genome_Seq[start - 1:stop])
|
|
57
74
|
c = 0
|
|
58
75
|
a = 0
|
|
59
76
|
g = 0
|
|
@@ -323,8 +340,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
323
340
|
comp.genes_Detected.update({str(gene_details): g_pos})
|
|
324
341
|
match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
|
|
325
342
|
perfect_Matched_Genes(g_Start, g_Stop, g_Strand)
|
|
326
|
-
if verbose == True:
|
|
327
|
-
|
|
343
|
+
#if verbose == True:
|
|
344
|
+
# print('Perfect Match')
|
|
328
345
|
elif perfect_Match == False and len(
|
|
329
346
|
overlapping_ORFs) == 1: # If we do not have a perfect match but 1 ORF which has passed the filtering
|
|
330
347
|
orf_Pos = list(overlapping_ORFs.keys())[0]
|
|
@@ -344,8 +361,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
344
361
|
comp.matched_ORFs.update({orf_Pos: m_ORF_Details})
|
|
345
362
|
comp.genes_Detected.update({str(gene_details): orf_Pos})
|
|
346
363
|
match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
|
|
347
|
-
if verbose == True:
|
|
348
|
-
|
|
364
|
+
#if verbose == True:
|
|
365
|
+
# print('Partial Match')
|
|
349
366
|
partial_Hit_Calc(g_Start, g_Stop, g_Strand, o_Start, o_Stop)
|
|
350
367
|
elif perfect_Match == False and len(
|
|
351
368
|
overlapping_ORFs) >= 1: # If we have more than 1 potential ORF match, we check to see which is the 'best' hit
|
|
@@ -374,8 +391,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
374
391
|
genes_Unmatched(g_Start, g_Stop, g_Strand) #
|
|
375
392
|
else:
|
|
376
393
|
genes_Unmatched(g_Start, g_Stop, g_Strand) # No hit
|
|
377
|
-
if verbose == True:
|
|
378
|
-
|
|
394
|
+
#if verbose == True:
|
|
395
|
+
# print("No Hit")
|
|
379
396
|
for orf_Key in comp.matched_ORFs: # Remove ORFs from out of frame if ORF was correctly matched to another Gene
|
|
380
397
|
if orf_Key in comp.out_Of_Frame_ORFs:
|
|
381
398
|
del comp.out_Of_Frame_ORFs[orf_Key]
|
|
@@ -409,7 +426,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
409
426
|
if gene_Length == 0: print(g_Start, g_Stop, "!!!!!!!!!!!!!!!!!!!!!!!!")
|
|
410
427
|
comp.gene_Lengths.append(gene_Length)
|
|
411
428
|
gene_Nuc_Array[g_Start - 1:g_Stop] = True # Changing all between the two positions to 1's
|
|
412
|
-
comp.gene_GC.append(nuc_Count(g_Start, g_Stop, g_Strand))
|
|
429
|
+
comp.gene_GC.append(nuc_Count(verbose, g_Start, g_Stop, g_Strand))
|
|
413
430
|
if gene_Length <= SHORT_ORF_LENGTH: # .utils
|
|
414
431
|
comp.gene_Short.append(gene_Length)
|
|
415
432
|
### Calculate overlapping Genes -
|
|
@@ -453,7 +470,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
453
470
|
orf_Length = (o_Stop - o_Start) +1
|
|
454
471
|
comp.orf_Lengths.append(orf_Length)
|
|
455
472
|
orf_Nuc_Array[o_Start - 1:o_Stop] = True # Changing all between the two positions to 1's
|
|
456
|
-
comp.orf_GC.append(nuc_Count(o_Start, o_Stop, o_Strand))
|
|
473
|
+
comp.orf_GC.append(nuc_Count(verbose, o_Start, o_Stop, o_Strand))
|
|
457
474
|
if orf_Length <= SHORT_ORF_LENGTH: # .utils
|
|
458
475
|
comp.orf_Short.append(orf_Length)
|
|
459
476
|
### Calculate overlapping ORFs -
|
|
@@ -487,7 +504,7 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
|
|
|
487
504
|
mo_Length = (mo_Stop - mo_Start)
|
|
488
505
|
matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = True # This is the complete matched orf not the matched orf bits
|
|
489
506
|
|
|
490
|
-
comp.m_ORF_GC.append(nuc_Count(mo_Start, mo_Stop, mo_Strand))
|
|
507
|
+
comp.m_ORF_GC.append(nuc_Count(verbose, mo_Start, mo_Stop, mo_Strand))
|
|
491
508
|
if mo_Length <= SHORT_ORF_LENGTH: # .utils
|
|
492
509
|
comp.m_ORF_Short.append(mo_Length)
|
|
493
510
|
### Calculate overlapping Matched ORFs -
|
ORForise/Tools/GFF/GFF.py
CHANGED
|
@@ -26,15 +26,32 @@ def GFF(*args):
|
|
|
26
26
|
stop = int(line[4])
|
|
27
27
|
strand = line[6]
|
|
28
28
|
info = line[8]
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
29
|
+
if stop >= genome_size:
|
|
30
|
+
extra_stop = stop - genome_size
|
|
31
|
+
corrected_stop = genome_size
|
|
32
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
33
|
+
r_start = genome_size - corrected_stop
|
|
34
|
+
r_stop = genome_size - start
|
|
35
|
+
seq = genome_rev[r_start:r_stop + 1]
|
|
36
|
+
extra_seq = genome_rev[-extra_stop - 1:]
|
|
37
|
+
seq = extra_seq+seq
|
|
38
|
+
startCodon = seq[:3]
|
|
39
|
+
stopCodon = seq[-3:]
|
|
40
|
+
elif '+' in strand:
|
|
41
|
+
seq = genome[start -1 :corrected_stop]
|
|
42
|
+
extra_seq = genome[:extra_stop +1]
|
|
43
|
+
seq = seq+extra_seq
|
|
44
|
+
startCodon = seq[:3]
|
|
45
|
+
stopCodon = seq[-3:]
|
|
46
|
+
else:
|
|
47
|
+
if '-' in strand: # Reverse Compliment starts and stops adjusted
|
|
48
|
+
r_start = genome_size - stop
|
|
49
|
+
r_stop = genome_size - start
|
|
50
|
+
startCodon = genome_rev[r_start:r_start + 3]
|
|
51
|
+
stopCodon = genome_rev[r_stop - 2:r_stop + 1]
|
|
52
|
+
elif '+' in strand:
|
|
53
|
+
startCodon = genome[start - 1:start + 2]
|
|
54
|
+
stopCodon = genome[stop - 3:stop]
|
|
38
55
|
po = str(start) + ',' + str(stop)
|
|
39
56
|
orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
|
|
40
57
|
GFF_ORFs.update({po: orf})
|
ORForise/utils.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: ORForise
|
|
3
|
-
Version: 1.4.
|
|
3
|
+
Version: 1.4.2
|
|
4
4
|
Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
|
|
5
5
|
Home-page: https://github.com/NickJD/ORForise
|
|
6
6
|
Author: Nicholas Dimonaco
|
|
@@ -62,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
62
62
|
usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
|
|
63
63
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
64
64
|
|
|
65
|
-
ORForise v1.4.
|
|
65
|
+
ORForise v1.4.2: Annotatione-Compare Run Parameters.
|
|
66
66
|
|
|
67
67
|
Required Arguments:
|
|
68
68
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -112,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
112
112
|
usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
|
|
113
113
|
[-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
|
|
114
114
|
|
|
115
|
-
ORForise v1.4.
|
|
115
|
+
ORForise v1.4.2: Aggregate-Compare Run Parameters.
|
|
116
116
|
|
|
117
117
|
Required Arguments:
|
|
118
118
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -266,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
266
266
|
usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
|
|
267
267
|
OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
|
|
268
268
|
|
|
269
|
-
ORForise v1.4.
|
|
269
|
+
ORForise v1.4.2: GFF-Adder Run Parameters.
|
|
270
270
|
|
|
271
271
|
Required Arguments:
|
|
272
272
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -328,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
|
|
|
328
328
|
usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
|
|
329
329
|
ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
|
|
330
330
|
|
|
331
|
-
ORForise v1.4.
|
|
331
|
+
ORForise v1.4.2: GFF-Intersector Run Parameters.
|
|
332
332
|
|
|
333
333
|
Required Arguments:
|
|
334
334
|
-dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
|
|
@@ -1,11 +1,11 @@
|
|
|
1
1
|
ORForise/Aggregate_Compare.py,sha256=cY0PdA_SnywPcqwPomXmEHaZ6OUDS9k_QeLtXnewjiA,10648
|
|
2
|
-
ORForise/Annotation_Compare.py,sha256=
|
|
3
|
-
ORForise/Comparator.py,sha256=
|
|
2
|
+
ORForise/Annotation_Compare.py,sha256=6y_RiJg0q9g4Bcwy8Lxi5gSDkMLwm6uYJG2evxnKAhU,10228
|
|
3
|
+
ORForise/Comparator.py,sha256=AEpZQ8IURgYrWLKRRQEBUp3nFWKsxTb0f3O6XdHfRAc,45041
|
|
4
4
|
ORForise/GFF_Adder.py,sha256=-BlF6DQWcbhyYT88M0ZkoaWA2YDDxsby-7jksfeJN1Q,14057
|
|
5
5
|
ORForise/GFF_Intersector.py,sha256=EcDKyJr_47066kma2CguMf3uwzB2tYomPDFjmoX8IoU,9900
|
|
6
6
|
ORForise/StORForise.py,sha256=2QU6q3wPK6iqtyKg2jEVwFTB4bSymyc-mSpk7T8yNaY,5431
|
|
7
7
|
ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
ORForise/utils.py,sha256=
|
|
8
|
+
ORForise/utils.py,sha256=BeYOERE3UfBXpazmLDOQDzXj-bGbXd9oooWyPC1Ts1s,1099
|
|
9
9
|
ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
10
|
ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
|
|
11
11
|
ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
|
|
@@ -26,7 +26,7 @@ ORForise/Tools/FGENESB/FGENESB.py,sha256=TCvsGzfZ41tKkgF6TaBFpsuZBrueSygmoBco7d6
|
|
|
26
26
|
ORForise/Tools/FGENESB/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
27
27
|
ORForise/Tools/FragGeneScan/FragGeneScan.py,sha256=l3lqIxRUEx7lIV8Odhm6NsTgfHTrriYXcFoA4WW-E-E,1376
|
|
28
28
|
ORForise/Tools/FragGeneScan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
|
-
ORForise/Tools/GFF/GFF.py,sha256=
|
|
29
|
+
ORForise/Tools/GFF/GFF.py,sha256=RF-PtryGTV0Lgz6sT7L5idVEwCF_MP0prIcfaUYCoAQ,2806
|
|
30
30
|
ORForise/Tools/GFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
31
|
ORForise/Tools/GLIMMER_3/GLIMMER_3.py,sha256=9WQNSdlhQOpHQ4zcxncrTb2Lt6tiUB8Y0FBoyGxG_Yc,1723
|
|
32
32
|
ORForise/Tools/GLIMMER_3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -60,9 +60,9 @@ ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa
|
|
|
60
60
|
ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
|
|
61
61
|
ORForise/Tools/TransDecoder/TransDecoder.py,sha256=utnL52il6BGbbBxoizYPnY1qwBGeslYDCa5xU9RGWPg,1384
|
|
62
62
|
ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
ORForise-1.4.
|
|
64
|
-
ORForise-1.4.
|
|
65
|
-
ORForise-1.4.
|
|
66
|
-
ORForise-1.4.
|
|
67
|
-
ORForise-1.4.
|
|
68
|
-
ORForise-1.4.
|
|
63
|
+
ORForise-1.4.2.dist-info/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
|
|
64
|
+
ORForise-1.4.2.dist-info/METADATA,sha256=kv8pem6rn0yrjNtc9Gkm-RZvWsafVx866aCjUIdti5c,36457
|
|
65
|
+
ORForise-1.4.2.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
|
|
66
|
+
ORForise-1.4.2.dist-info/entry_points.txt,sha256=ss2cbLmljRmLIeZ3t48p_06NuQuRiKeA11IOUYg_uiY,246
|
|
67
|
+
ORForise-1.4.2.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
|
|
68
|
+
ORForise-1.4.2.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|