ORForise 1.5.1__py3-none-any.whl → 1.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- ORForise/Aggregate_Compare.py +2 -4
- ORForise/Annotation_Compare.py +16 -53
- ORForise/Annotation_Intersector.py +726 -0
- ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
- ORForise/Convert_To_GFF.py +139 -0
- ORForise/GFF_Adder.py +454 -179
- ORForise/List_Tools.py +63 -0
- ORForise/StORForise.py +8 -4
- ORForise/Tools/EasyGene/EasyGene.py +13 -1
- ORForise/Tools/{GLIMMER_3/GLIMMER_3.py → GLIMMER3/GLIMMER3.py} +2 -2
- ORForise/Tools/GLIMMER3/__init__.py +0 -0
- ORForise/Tools/{GeneMark_HA/GeneMark_HA.py → GeneMarkHA/GeneMarkHA.py} +1 -1
- ORForise/Tools/GeneMarkHA/__init__.py +0 -0
- ORForise/Tools/Prodigal/Prodigal.py +13 -1
- ORForise/utils.py +4 -1
- orforise-1.6.1.dist-info/METADATA +1038 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/RECORD +29 -24
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/entry_points.txt +6 -2
- ORForise/GFF_Intersector.py +0 -192
- orforise-1.5.1.dist-info/METADATA +0 -427
- /ORForise/{Tools → Aux}/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
- /ORForise/{Tools/GLIMMER_3 → Aux/StORF_Undetected/Completely_Undetected}/__init__.py +0 -0
- /ORForise/{Tools → Aux}/StORF_Undetected/StORF_Undetected.py +0 -0
- /ORForise/{Tools/GeneMark_HA → Aux/StORF_Undetected}/__init__.py +0 -0
- /ORForise/{Tools/StORF_Undetected/Completely_Undetected → Aux/StORF_Undetected/unvitiated_Genes}/__init__.py +0 -0
- /ORForise/{Tools → Aux}/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
- /ORForise/{Tools/StORF_Undetected → Aux/TabToGFF}/__init__.py +0 -0
- /ORForise/{Tools/StORF_Undetected/unvitiated_Genes → Aux}/__init__.py +0 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/WHEEL +0 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/licenses/LICENSE +0 -0
- {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/top_level.txt +0 -0
|
@@ -1,11 +1,22 @@
|
|
|
1
|
-
ORForise/Aggregate_Compare.py,sha256=
|
|
2
|
-
ORForise/Annotation_Compare.py,sha256=
|
|
1
|
+
ORForise/Aggregate_Compare.py,sha256=kf9O_W3520To9yi2zUtmNmDfWZ2oIGr9rfdpbzg699o,22767
|
|
2
|
+
ORForise/Annotation_Compare.py,sha256=5IbaZX9OCkeP90tDYXLqV20KTsuNCHST1-eqbWjuid8,18436
|
|
3
|
+
ORForise/Annotation_Intersector.py,sha256=8nXMCdEifpLAowtTfD1zUibwpOaS4Y3fRkji_3umfVo,34458
|
|
3
4
|
ORForise/Comparator.py,sha256=hvoLppG4tq7iBrIJMmm-ckIpRoLYEryTRQrASNWAjs0,48062
|
|
4
|
-
ORForise/
|
|
5
|
-
ORForise/
|
|
6
|
-
ORForise/
|
|
5
|
+
ORForise/Convert_To_GFF.py,sha256=V1tg-qT6m7uKFgNNNVPK-bYjFTnSdREJd7y92wrGnvM,5889
|
|
6
|
+
ORForise/GFF_Adder.py,sha256=n-AF-JQv6sL46qJEY761EoG4tG5yzqr30r2Zk0v-DMg,28537
|
|
7
|
+
ORForise/List_Tools.py,sha256=6WxP_j6tLWpJ4-obcgM5pEL__C8eWx0rJnsMHY1jWtM,1666
|
|
8
|
+
ORForise/StORForise.py,sha256=jXa-QmN3tIkN5qiqJcFufIY5VuxwU1k99vppGPWIOa4,5627
|
|
7
9
|
ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
8
|
-
ORForise/utils.py,sha256=
|
|
10
|
+
ORForise/utils.py,sha256=sEDOATM5v50g9sKbBf2QinaCnGel7EtnKiR-UebtTwQ,15751
|
|
11
|
+
ORForise/Aux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
ORForise/Aux/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
|
|
13
|
+
ORForise/Aux/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
14
|
+
ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha256=PjCtqenheuofqTaWpfJa7VCiMK0s-9kMnOlGIBl9f7k,1860
|
|
15
|
+
ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
17
|
+
ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
|
|
18
|
+
ORForise/Aux/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
|
|
19
|
+
ORForise/Aux/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
9
20
|
ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
10
21
|
ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
|
|
11
22
|
ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
|
|
@@ -20,7 +31,7 @@ ORForise/Tools/Augustus/Augustus.py,sha256=rEZ3h3eHrCfMFDorXxF5h0j4Wr5vTHG_rrQ-1
|
|
|
20
31
|
ORForise/Tools/Augustus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
21
32
|
ORForise/Tools/Balrog/Balrog.py,sha256=wrxQe7Df-iYUq3IQvX8A9GzDy5qR9rt5LHkDnDUngKc,1768
|
|
22
33
|
ORForise/Tools/Balrog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
23
|
-
ORForise/Tools/EasyGene/EasyGene.py,sha256=
|
|
34
|
+
ORForise/Tools/EasyGene/EasyGene.py,sha256=FCvcmL6w_ytefSNvWfWXIG6h0BKObPtTcCp0iGUSdH0,2330
|
|
24
35
|
ORForise/Tools/EasyGene/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
25
36
|
ORForise/Tools/FGENESB/FGENESB.py,sha256=3Jxe2DzUTG77wllSJpN__c_4cdl_gcj2idLXNMkv1Cs,1871
|
|
26
37
|
ORForise/Tools/FGENESB/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -28,12 +39,12 @@ ORForise/Tools/FragGeneScan/FragGeneScan.py,sha256=ofywMVF-FBM4s3FPwoWsJKQUX0T_i
|
|
|
28
39
|
ORForise/Tools/FragGeneScan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
29
40
|
ORForise/Tools/GFF/GFF.py,sha256=Z9xPCWNXrmRVvBR9_PNlajQz8ZYFHvOdXwCskXR1XhI,3219
|
|
30
41
|
ORForise/Tools/GFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
31
|
-
ORForise/Tools/
|
|
32
|
-
ORForise/Tools/
|
|
42
|
+
ORForise/Tools/GLIMMER3/GLIMMER3.py,sha256=AP5FttPNsGBKrkvT-1EXO66P1LfHELj9rCo3lNWoqQk,2122
|
|
43
|
+
ORForise/Tools/GLIMMER3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
33
44
|
ORForise/Tools/GeneMark/GeneMark.py,sha256=SeovWnoLy7Ktkc37TXLjWWUmmgEMvip7j2XIe5fiYaA,5815
|
|
34
45
|
ORForise/Tools/GeneMark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
-
ORForise/Tools/
|
|
36
|
-
ORForise/Tools/
|
|
46
|
+
ORForise/Tools/GeneMarkHA/GeneMarkHA.py,sha256=FUBYsscSR9Uc1dM5kXKWtu-HJ1s2Dmn2cBapAT0t308,1736
|
|
47
|
+
ORForise/Tools/GeneMarkHA/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
48
|
ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py,sha256=HK1SWj-M_9AWngMkkWOXQf6sr__kvON8ZL_wYRTMEzk,1753
|
|
38
49
|
ORForise/Tools/GeneMark_HMM/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
39
50
|
ORForise/Tools/GeneMark_S/GeneMark_S.py,sha256=56FQ-u-uvZFN41Ii0tGCUuBWsZaPaxvigbuOVg_4QCw,1722
|
|
@@ -46,23 +57,17 @@ ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py,sha256=pfQgzwwBz54kVsLWH7G
|
|
|
46
57
|
ORForise/Tools/MetaGeneAnnotator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
47
58
|
ORForise/Tools/MetaGeneMark/MetaGeneMark.py,sha256=_JMGtHvuX-qM-PSFI6EV91Jm86DWluukwGq7lFFCCSo,1848
|
|
48
59
|
ORForise/Tools/MetaGeneMark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
-
ORForise/Tools/Prodigal/Prodigal.py,sha256=
|
|
60
|
+
ORForise/Tools/Prodigal/Prodigal.py,sha256=DwcZmYI0MSrBj180Yj_S-jq3lIYbVUMrf0PQpjsUSx8,2324
|
|
50
61
|
ORForise/Tools/Prodigal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
51
62
|
ORForise/Tools/Prokka/Prokka.py,sha256=-wKNDcZTbnUpqeqlc7VvXcC0KnwMZ4BduWAlH1p8ULU,1887
|
|
52
63
|
ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
53
64
|
ORForise/Tools/StORF_Reporter/StORF_Reporter.py,sha256=areqA94r6nU3GOodnl4QzQbnkMd1XRve0SWn11XoOec,1993
|
|
54
65
|
ORForise/Tools/StORF_Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
55
|
-
ORForise/Tools/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
|
|
56
|
-
ORForise/Tools/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
57
|
-
ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha256=PjCtqenheuofqTaWpfJa7VCiMK0s-9kMnOlGIBl9f7k,1860
|
|
58
|
-
ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
59
|
-
ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
60
|
-
ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
|
|
61
66
|
ORForise/Tools/TransDecoder/TransDecoder.py,sha256=YlYxxicuP8xjwNkAKbHOdfaurvOHH0whYxaiB6B2kjs,1778
|
|
62
67
|
ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
63
|
-
orforise-1.
|
|
64
|
-
orforise-1.
|
|
65
|
-
orforise-1.
|
|
66
|
-
orforise-1.
|
|
67
|
-
orforise-1.
|
|
68
|
-
orforise-1.
|
|
68
|
+
orforise-1.6.1.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
|
|
69
|
+
orforise-1.6.1.dist-info/METADATA,sha256=swyauHPeW5wTZ72-k4BM_xUSwwBvsGrZHUKA_VyEGjc,59554
|
|
70
|
+
orforise-1.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
71
|
+
orforise-1.6.1.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
|
|
72
|
+
orforise-1.6.1.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
|
|
73
|
+
orforise-1.6.1.dist-info/RECORD,,
|
|
@@ -1,11 +1,15 @@
|
|
|
1
1
|
[console_scripts]
|
|
2
2
|
Aggregate-Compare = ORForise.Aggregate_Compare:main
|
|
3
3
|
Annotation-Compare = ORForise.Annotation_Compare:main
|
|
4
|
+
Annotation-Intersector = ORForise.Annotation_Intersector:main
|
|
5
|
+
Convert-To-GFF = ORForise.Convert_To_GFF:main
|
|
4
6
|
GFF-Adder = ORForise.GFF_Adder:main
|
|
5
|
-
|
|
7
|
+
List-Tools = ORForise.List_Tools:main
|
|
6
8
|
StORForise = ORForise.StORForise:main
|
|
7
9
|
aggregate-compare = ORForise.Aggregate_Compare:main
|
|
8
10
|
annotation-compare = ORForise.Annotation_Compare:main
|
|
11
|
+
annotation-intersector = ORForise.Annotation_Intersector:main
|
|
12
|
+
convert-to-gff = ORForise.Convert_To_GFF:main
|
|
9
13
|
gff-adder = ORForise.GFF_Adder:main
|
|
10
|
-
|
|
14
|
+
list-tools = ORForise.List_Tools:main
|
|
11
15
|
storforise = ORForise.StORForise:main
|
ORForise/GFF_Intersector.py
DELETED
|
@@ -1,192 +0,0 @@
|
|
|
1
|
-
from importlib import import_module
|
|
2
|
-
import argparse
|
|
3
|
-
import collections
|
|
4
|
-
from datetime import date
|
|
5
|
-
import sys
|
|
6
|
-
try:
|
|
7
|
-
from utils import *
|
|
8
|
-
except ImportError:
|
|
9
|
-
from .utils import *
|
|
10
|
-
|
|
11
|
-
################################
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
def gff_writer(genome_ID, genome_DNA,reference_annotation, reference_tool, ref_gene_set, additional_annotation, additional_tool, genes_To_Keep, output_file):
|
|
15
|
-
write_out = open(output_file, 'w')
|
|
16
|
-
write_out.write("##gff-version\t3\n#\tGFF-Intersector\n#\tRun Date:" + str(date.today()) + '\n')
|
|
17
|
-
write_out.write("##Genome DNA File:" + genome_DNA + '\n')
|
|
18
|
-
write_out.write("##Original File: " + reference_annotation + "\n##Intersecting File: " + additional_annotation + '\n')
|
|
19
|
-
for pos, data in genes_To_Keep.items():
|
|
20
|
-
pos_ = pos.split(',')
|
|
21
|
-
start = pos_[0]
|
|
22
|
-
stop = pos_[-1]
|
|
23
|
-
strand = data[0]
|
|
24
|
-
ref = reference_annotation.split('/')[-1].split('.')[0]
|
|
25
|
-
data[4] = data[4].replace('\n', '').replace('ID=','')
|
|
26
|
-
data[5] = data[5].replace('\n', '').replace('ID=','')
|
|
27
|
-
entry = (
|
|
28
|
-
genome_ID + '\t' + ref + '\t' + data[2] + '\t' + start + '\t' + stop + '\t.\t' + strand + '\t.\tID=Original_Annotation=' + data[4] + ';Additional_Annotation=' + data[5] + ';Coverage=' + str(
|
|
29
|
-
data[1]) + '\n')
|
|
30
|
-
write_out.write(entry)
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
def comparator(options): # Only works for single contig genome
|
|
34
|
-
genome_seq = ""
|
|
35
|
-
with open(options.genome_DNA, 'r') as genome_fasta:
|
|
36
|
-
for line in genome_fasta:
|
|
37
|
-
line = line.replace("\n", "")
|
|
38
|
-
if not line.startswith('>'):
|
|
39
|
-
genome_seq += str(line)
|
|
40
|
-
else:
|
|
41
|
-
genome_ID = line.split()[0].replace('>', '')
|
|
42
|
-
###########################################
|
|
43
|
-
if not options.reference_tool: # IF using Ensembl for comparison
|
|
44
|
-
ref_genes = collections.OrderedDict() # Order is important
|
|
45
|
-
count = 0
|
|
46
|
-
with open(options.reference_annotation, 'r') as genome_gff:
|
|
47
|
-
for line in genome_gff:
|
|
48
|
-
line = line.split('\t')
|
|
49
|
-
try:
|
|
50
|
-
if 'CDS' in options.gene_ident and len(options.gene_ident) == 1:
|
|
51
|
-
if "CDS" in line[2] and len(line) == 9:
|
|
52
|
-
start = int(line[3])
|
|
53
|
-
stop = int(line[4])
|
|
54
|
-
strand = line[6]
|
|
55
|
-
pos = str(start) + ',' + str(stop)
|
|
56
|
-
info = line[8]
|
|
57
|
-
ref_genes.update({pos: [strand, 'ref', 'CDS',info]})
|
|
58
|
-
count += 1
|
|
59
|
-
else:
|
|
60
|
-
gene_types = options.gene_ident.split(',')
|
|
61
|
-
if any(gene_type in line[2] for gene_type in gene_types): # line[2] for normalrun
|
|
62
|
-
start = int(line[3])
|
|
63
|
-
stop = int(line[4])
|
|
64
|
-
strand = line[6]
|
|
65
|
-
pos = str(start) + ',' + str(stop)
|
|
66
|
-
info = line[8]
|
|
67
|
-
ref_genes.update(
|
|
68
|
-
{pos: [strand, 'ref', line[2],info]}) # Report what type of gene/rRNA etc we have here
|
|
69
|
-
count += 1
|
|
70
|
-
except IndexError:
|
|
71
|
-
continue
|
|
72
|
-
else: # IF using a tool as reference
|
|
73
|
-
try:
|
|
74
|
-
reference_tool_ = import_module('Tools.' + options.reference_tool + '.' + options.reference_tool,
|
|
75
|
-
package='my_current_pkg')
|
|
76
|
-
except ModuleNotFoundError:
|
|
77
|
-
try:
|
|
78
|
-
reference_tool_ = import_module('ORForise.Tools.' + options.reference_tool + '.' + options.reference_tool,
|
|
79
|
-
package='my_current_pkg')
|
|
80
|
-
except ModuleNotFoundError:
|
|
81
|
-
sys.exit("Tool not available")
|
|
82
|
-
reference_tool_ = getattr(reference_tool_, options.reference_tool)
|
|
83
|
-
############ Reformatting tool output for ref_genes
|
|
84
|
-
ref_genes = reference_tool_(options.reference_annotation, genome_seq)
|
|
85
|
-
ref_gene_set = list(ref_genes.keys())
|
|
86
|
-
############################## Get Add'l
|
|
87
|
-
try:
|
|
88
|
-
additional_tool_ = import_module('Tools.' + options.additional_tool + '.' + options.additional_tool,
|
|
89
|
-
package='my_current_pkg')
|
|
90
|
-
except ModuleNotFoundError:
|
|
91
|
-
try:
|
|
92
|
-
additional_tool_ = import_module('ORForise.Tools.' + options.additional_tool + '.' + options.additional_tool,
|
|
93
|
-
package='my_current_pkg')
|
|
94
|
-
except ModuleNotFoundError:
|
|
95
|
-
sys.exit("Tool not available")
|
|
96
|
-
additional_tool_ = getattr(additional_tool_, options.additional_tool)
|
|
97
|
-
additional_orfs = additional_tool_(options.additional_annotation,genome_seq,options.gene_ident)
|
|
98
|
-
##############################
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
genes_To_Keep = collections.OrderedDict()
|
|
102
|
-
|
|
103
|
-
if options.coverage == 100:
|
|
104
|
-
for orf, data in additional_orfs.items():
|
|
105
|
-
o_Start = int(orf.split(',')[0])
|
|
106
|
-
o_Stop = int(orf.split(',')[1])
|
|
107
|
-
o_Strand = data[0]
|
|
108
|
-
additional_type = data[3]
|
|
109
|
-
additional_info = data[4]
|
|
110
|
-
try:
|
|
111
|
-
ref_type = ref_genes[str(o_Start) + ',' + str(o_Stop)][2]
|
|
112
|
-
ref_info = ref_genes[str(o_Start) + ',' + str(o_Stop)][3]
|
|
113
|
-
except KeyError:
|
|
114
|
-
continue
|
|
115
|
-
#try:
|
|
116
|
-
#if ref_genes[str(o_Start) + ',' + str(o_Stop)][2] == "CDS" : # Make sure 100% match and is also CDS
|
|
117
|
-
if additional_type == ref_type:
|
|
118
|
-
genes_To_Keep.update({str(o_Start) + ',' + str(o_Stop): [o_Strand, options.coverage,additional_type,ref_type,additional_info,ref_info]}) # o_ and g_ would be the same here
|
|
119
|
-
#except KeyError:
|
|
120
|
-
# continue
|
|
121
|
-
else:
|
|
122
|
-
for orf, data in additional_orfs.items(): # Currently allows ORF to be bigger than Gene
|
|
123
|
-
o_Start = int(orf.split(',')[0])
|
|
124
|
-
o_Stop = int(orf.split(',')[1])
|
|
125
|
-
o_Strand = data[0]
|
|
126
|
-
orf_Set = set(range(int(o_Start), int(o_Stop) + 1))
|
|
127
|
-
for gene, r_data in ref_genes.items(): # Very ineffecient
|
|
128
|
-
g_Start = int(gene.split(',')[0])
|
|
129
|
-
g_Stop = int(gene.split(',')[1])
|
|
130
|
-
g_Strand = r_data[0]
|
|
131
|
-
gene_Set = set(range(int(g_Start), int(g_Stop) + 1))
|
|
132
|
-
overlap = len(orf_Set.intersection(gene_Set))
|
|
133
|
-
cov = 100 * float(overlap) / float(len(gene_Set))
|
|
134
|
-
|
|
135
|
-
additional_type = data[3]
|
|
136
|
-
additional_info = data[4]
|
|
137
|
-
ref_type = r_data[2]
|
|
138
|
-
ref_info = r_data[3]
|
|
139
|
-
|
|
140
|
-
if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and cov >= options.coverage:
|
|
141
|
-
if additional_type == ref_type:
|
|
142
|
-
genes_To_Keep.update({str(g_Start) + ',' + str(g_Stop): [g_Strand, int(cov),additional_type,ref_type,additional_info,ref_info]})
|
|
143
|
-
if g_Start > o_Stop:
|
|
144
|
-
break
|
|
145
|
-
#########################################################
|
|
146
|
-
#### Currently, only CDSs are filtered
|
|
147
|
-
# for gene, g_data in ref_genes.items(): # Very ineffecient
|
|
148
|
-
# if "CDS" not in g_data[2]:
|
|
149
|
-
# g_Start = int(gene.split(',')[0])
|
|
150
|
-
# g_Stop = int(gene.split(',')[1])
|
|
151
|
-
# g_Strand = g_data[0]
|
|
152
|
-
# genes_To_Keep.update({str(g_Start) + ',' + str(g_Stop): [g_Strand, "N/A",g_data[2]]})
|
|
153
|
-
genes_To_Keep = sortORFs(genes_To_Keep)
|
|
154
|
-
gff_writer(genome_ID, options.genome_DNA, options.reference_annotation, options.reference_tool, ref_gene_set, options.additional_annotation, options.additional_tool, genes_To_Keep, options.output_file)
|
|
155
|
-
|
|
156
|
-
def main():
|
|
157
|
-
print("Thank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n#####")
|
|
158
|
-
|
|
159
|
-
parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': GFF-Intersector Run Parameters.')
|
|
160
|
-
parser._action_groups.pop()
|
|
161
|
-
|
|
162
|
-
required = parser.add_argument_group('Required Arguments')
|
|
163
|
-
required.add_argument('-dna', dest='genome_DNA', required=True, help='Genome DNA file (.fa) which both annotations '
|
|
164
|
-
'are based on')
|
|
165
|
-
required.add_argument('-ref', dest='reference_annotation', required=True,
|
|
166
|
-
help='Which reference annotation file to use as reference?')
|
|
167
|
-
required.add_argument('-at', dest='additional_tool', required=True,
|
|
168
|
-
help='Which format to use for additional annotation?')
|
|
169
|
-
required.add_argument('-add', dest='additional_annotation', required=True,
|
|
170
|
-
help='Which annotation file to add to reference annotation?')
|
|
171
|
-
required.add_argument('-o', dest='output_file', required=True,
|
|
172
|
-
help='Output filename')
|
|
173
|
-
|
|
174
|
-
optional = parser.add_argument_group('Optional Arguments')
|
|
175
|
-
optional.add_argument('-rt', dest='reference_tool', required=False,
|
|
176
|
-
help='Which tool format to use as reference? - If not provided, will default to the '
|
|
177
|
-
'standard GFF format and will only look for "CDS" features')
|
|
178
|
-
optional.add_argument('-gi', dest='gene_ident', default='CDS', required=False,
|
|
179
|
-
help='Identifier used for extraction of "genic" regions from reference annotation '
|
|
180
|
-
'"CDS,rRNA,tRNA": Default for is "CDS" ')
|
|
181
|
-
optional.add_argument('-cov', dest='coverage', default=100, type=int, required=False,
|
|
182
|
-
help='Percentage coverage of reference annotation needed to confirm intersection'
|
|
183
|
-
' - Default: 100 == exact match')
|
|
184
|
-
|
|
185
|
-
options = parser.parse_args()
|
|
186
|
-
comparator(options)
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
if __name__ == "__main__":
|
|
191
|
-
main()
|
|
192
|
-
print("Complete")
|