ORForise 1.5.1__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (31) hide show
  1. ORForise/Aggregate_Compare.py +2 -4
  2. ORForise/Annotation_Compare.py +16 -53
  3. ORForise/Annotation_Intersector.py +726 -0
  4. ORForise/Aux/TabToGFF/TabToGFF.py +140 -0
  5. ORForise/Convert_To_GFF.py +139 -0
  6. ORForise/GFF_Adder.py +454 -179
  7. ORForise/List_Tools.py +63 -0
  8. ORForise/StORForise.py +8 -4
  9. ORForise/Tools/EasyGene/EasyGene.py +13 -1
  10. ORForise/Tools/{GLIMMER_3/GLIMMER_3.py → GLIMMER3/GLIMMER3.py} +2 -2
  11. ORForise/Tools/GLIMMER3/__init__.py +0 -0
  12. ORForise/Tools/{GeneMark_HA/GeneMark_HA.py → GeneMarkHA/GeneMarkHA.py} +1 -1
  13. ORForise/Tools/GeneMarkHA/__init__.py +0 -0
  14. ORForise/Tools/Prodigal/Prodigal.py +13 -1
  15. ORForise/utils.py +4 -1
  16. orforise-1.6.1.dist-info/METADATA +1038 -0
  17. {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/RECORD +29 -24
  18. {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/entry_points.txt +6 -2
  19. ORForise/GFF_Intersector.py +0 -192
  20. orforise-1.5.1.dist-info/METADATA +0 -427
  21. /ORForise/{Tools → Aux}/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
  22. /ORForise/{Tools/GLIMMER_3 → Aux/StORF_Undetected/Completely_Undetected}/__init__.py +0 -0
  23. /ORForise/{Tools → Aux}/StORF_Undetected/StORF_Undetected.py +0 -0
  24. /ORForise/{Tools/GeneMark_HA → Aux/StORF_Undetected}/__init__.py +0 -0
  25. /ORForise/{Tools/StORF_Undetected/Completely_Undetected → Aux/StORF_Undetected/unvitiated_Genes}/__init__.py +0 -0
  26. /ORForise/{Tools → Aux}/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
  27. /ORForise/{Tools/StORF_Undetected → Aux/TabToGFF}/__init__.py +0 -0
  28. /ORForise/{Tools/StORF_Undetected/unvitiated_Genes → Aux}/__init__.py +0 -0
  29. {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/WHEEL +0 -0
  30. {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/licenses/LICENSE +0 -0
  31. {orforise-1.5.1.dist-info → orforise-1.6.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,22 @@
1
- ORForise/Aggregate_Compare.py,sha256=WzP34E4YqkOBXlE9obZfPf3Sp1Gwl40WPqE7PsGntqk,22977
2
- ORForise/Annotation_Compare.py,sha256=uMf_FDeFMYjiJj42_-PsglHwVPkfZvsOiQ3IZpP2DgU,21585
1
+ ORForise/Aggregate_Compare.py,sha256=kf9O_W3520To9yi2zUtmNmDfWZ2oIGr9rfdpbzg699o,22767
2
+ ORForise/Annotation_Compare.py,sha256=5IbaZX9OCkeP90tDYXLqV20KTsuNCHST1-eqbWjuid8,18436
3
+ ORForise/Annotation_Intersector.py,sha256=8nXMCdEifpLAowtTfD1zUibwpOaS4Y3fRkji_3umfVo,34458
3
4
  ORForise/Comparator.py,sha256=hvoLppG4tq7iBrIJMmm-ckIpRoLYEryTRQrASNWAjs0,48062
4
- ORForise/GFF_Adder.py,sha256=-BlF6DQWcbhyYT88M0ZkoaWA2YDDxsby-7jksfeJN1Q,14057
5
- ORForise/GFF_Intersector.py,sha256=EcDKyJr_47066kma2CguMf3uwzB2tYomPDFjmoX8IoU,9900
6
- ORForise/StORForise.py,sha256=2QU6q3wPK6iqtyKg2jEVwFTB4bSymyc-mSpk7T8yNaY,5431
5
+ ORForise/Convert_To_GFF.py,sha256=V1tg-qT6m7uKFgNNNVPK-bYjFTnSdREJd7y92wrGnvM,5889
6
+ ORForise/GFF_Adder.py,sha256=n-AF-JQv6sL46qJEY761EoG4tG5yzqr30r2Zk0v-DMg,28537
7
+ ORForise/List_Tools.py,sha256=6WxP_j6tLWpJ4-obcgM5pEL__C8eWx0rJnsMHY1jWtM,1666
8
+ ORForise/StORForise.py,sha256=jXa-QmN3tIkN5qiqJcFufIY5VuxwU1k99vppGPWIOa4,5627
7
9
  ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
- ORForise/utils.py,sha256=zCE4pGtt3FxptolAxF9cPC5oYjYArTB6HMCZSkiGHvE,15548
10
+ ORForise/utils.py,sha256=sEDOATM5v50g9sKbBf2QinaCnGel7EtnKiR-UebtTwQ,15751
11
+ ORForise/Aux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ ORForise/Aux/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
13
+ ORForise/Aux/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha256=PjCtqenheuofqTaWpfJa7VCiMK0s-9kMnOlGIBl9f7k,1860
15
+ ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
18
+ ORForise/Aux/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
19
+ ORForise/Aux/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
20
  ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
21
  ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
11
22
  ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
@@ -20,7 +31,7 @@ ORForise/Tools/Augustus/Augustus.py,sha256=rEZ3h3eHrCfMFDorXxF5h0j4Wr5vTHG_rrQ-1
20
31
  ORForise/Tools/Augustus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
21
32
  ORForise/Tools/Balrog/Balrog.py,sha256=wrxQe7Df-iYUq3IQvX8A9GzDy5qR9rt5LHkDnDUngKc,1768
22
33
  ORForise/Tools/Balrog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
23
- ORForise/Tools/EasyGene/EasyGene.py,sha256=_1gGRYulpnhgB2xL7ZsnkXF4U8O9XgC0XQTZq7XubC4,1752
34
+ ORForise/Tools/EasyGene/EasyGene.py,sha256=FCvcmL6w_ytefSNvWfWXIG6h0BKObPtTcCp0iGUSdH0,2330
24
35
  ORForise/Tools/EasyGene/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
25
36
  ORForise/Tools/FGENESB/FGENESB.py,sha256=3Jxe2DzUTG77wllSJpN__c_4cdl_gcj2idLXNMkv1Cs,1871
26
37
  ORForise/Tools/FGENESB/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -28,12 +39,12 @@ ORForise/Tools/FragGeneScan/FragGeneScan.py,sha256=ofywMVF-FBM4s3FPwoWsJKQUX0T_i
28
39
  ORForise/Tools/FragGeneScan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
29
40
  ORForise/Tools/GFF/GFF.py,sha256=Z9xPCWNXrmRVvBR9_PNlajQz8ZYFHvOdXwCskXR1XhI,3219
30
41
  ORForise/Tools/GFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
31
- ORForise/Tools/GLIMMER_3/GLIMMER_3.py,sha256=McFulHAHV4e3ROVmTn0JSz-r0TTqiEor0MsemsIkSjc,2124
32
- ORForise/Tools/GLIMMER_3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ ORForise/Tools/GLIMMER3/GLIMMER3.py,sha256=AP5FttPNsGBKrkvT-1EXO66P1LfHELj9rCo3lNWoqQk,2122
43
+ ORForise/Tools/GLIMMER3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
33
44
  ORForise/Tools/GeneMark/GeneMark.py,sha256=SeovWnoLy7Ktkc37TXLjWWUmmgEMvip7j2XIe5fiYaA,5815
34
45
  ORForise/Tools/GeneMark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
- ORForise/Tools/GeneMark_HA/GeneMark_HA.py,sha256=nfMEAszBEZw4zhhW3VtZt6yqJprXkydVAR1a3RrAm1k,1737
36
- ORForise/Tools/GeneMark_HA/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ ORForise/Tools/GeneMarkHA/GeneMarkHA.py,sha256=FUBYsscSR9Uc1dM5kXKWtu-HJ1s2Dmn2cBapAT0t308,1736
47
+ ORForise/Tools/GeneMarkHA/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
48
  ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py,sha256=HK1SWj-M_9AWngMkkWOXQf6sr__kvON8ZL_wYRTMEzk,1753
38
49
  ORForise/Tools/GeneMark_HMM/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
39
50
  ORForise/Tools/GeneMark_S/GeneMark_S.py,sha256=56FQ-u-uvZFN41Ii0tGCUuBWsZaPaxvigbuOVg_4QCw,1722
@@ -46,23 +57,17 @@ ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py,sha256=pfQgzwwBz54kVsLWH7G
46
57
  ORForise/Tools/MetaGeneAnnotator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
47
58
  ORForise/Tools/MetaGeneMark/MetaGeneMark.py,sha256=_JMGtHvuX-qM-PSFI6EV91Jm86DWluukwGq7lFFCCSo,1848
48
59
  ORForise/Tools/MetaGeneMark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
- ORForise/Tools/Prodigal/Prodigal.py,sha256=y8bnh9_A4DmTJAsKqD_1S_d9ak56ZXsnUiFhrGudNcw,1758
60
+ ORForise/Tools/Prodigal/Prodigal.py,sha256=DwcZmYI0MSrBj180Yj_S-jq3lIYbVUMrf0PQpjsUSx8,2324
50
61
  ORForise/Tools/Prodigal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
62
  ORForise/Tools/Prokka/Prokka.py,sha256=-wKNDcZTbnUpqeqlc7VvXcC0KnwMZ4BduWAlH1p8ULU,1887
52
63
  ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
53
64
  ORForise/Tools/StORF_Reporter/StORF_Reporter.py,sha256=areqA94r6nU3GOodnl4QzQbnkMd1XRve0SWn11XoOec,1993
54
65
  ORForise/Tools/StORF_Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
- ORForise/Tools/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
56
- ORForise/Tools/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
57
- ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha256=PjCtqenheuofqTaWpfJa7VCiMK0s-9kMnOlGIBl9f7k,1860
58
- ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
59
- ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
61
66
  ORForise/Tools/TransDecoder/TransDecoder.py,sha256=YlYxxicuP8xjwNkAKbHOdfaurvOHH0whYxaiB6B2kjs,1778
62
67
  ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
63
- orforise-1.5.1.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
64
- orforise-1.5.1.dist-info/METADATA,sha256=FiL1iRib1myMD_MeZ_vU3gl_7qE-sd40fXUGdzJKE0A,35341
65
- orforise-1.5.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
66
- orforise-1.5.1.dist-info/entry_points.txt,sha256=VXYTkaTIjYu4LhZjhRyCezsg7n9bNeG7W2l4FTwCopE,474
67
- orforise-1.5.1.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
68
- orforise-1.5.1.dist-info/RECORD,,
68
+ orforise-1.6.1.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
69
+ orforise-1.6.1.dist-info/METADATA,sha256=swyauHPeW5wTZ72-k4BM_xUSwwBvsGrZHUKA_VyEGjc,59554
70
+ orforise-1.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ orforise-1.6.1.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
72
+ orforise-1.6.1.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
73
+ orforise-1.6.1.dist-info/RECORD,,
@@ -1,11 +1,15 @@
1
1
  [console_scripts]
2
2
  Aggregate-Compare = ORForise.Aggregate_Compare:main
3
3
  Annotation-Compare = ORForise.Annotation_Compare:main
4
+ Annotation-Intersector = ORForise.Annotation_Intersector:main
5
+ Convert-To-GFF = ORForise.Convert_To_GFF:main
4
6
  GFF-Adder = ORForise.GFF_Adder:main
5
- GFF-Intersector = ORForise.GFF_Intersector:main
7
+ List-Tools = ORForise.List_Tools:main
6
8
  StORForise = ORForise.StORForise:main
7
9
  aggregate-compare = ORForise.Aggregate_Compare:main
8
10
  annotation-compare = ORForise.Annotation_Compare:main
11
+ annotation-intersector = ORForise.Annotation_Intersector:main
12
+ convert-to-gff = ORForise.Convert_To_GFF:main
9
13
  gff-adder = ORForise.GFF_Adder:main
10
- gff-intersector = ORForise.GFF_Intersector:main
14
+ list-tools = ORForise.List_Tools:main
11
15
  storforise = ORForise.StORForise:main
@@ -1,192 +0,0 @@
1
- from importlib import import_module
2
- import argparse
3
- import collections
4
- from datetime import date
5
- import sys
6
- try:
7
- from utils import *
8
- except ImportError:
9
- from .utils import *
10
-
11
- ################################
12
-
13
-
14
- def gff_writer(genome_ID, genome_DNA,reference_annotation, reference_tool, ref_gene_set, additional_annotation, additional_tool, genes_To_Keep, output_file):
15
- write_out = open(output_file, 'w')
16
- write_out.write("##gff-version\t3\n#\tGFF-Intersector\n#\tRun Date:" + str(date.today()) + '\n')
17
- write_out.write("##Genome DNA File:" + genome_DNA + '\n')
18
- write_out.write("##Original File: " + reference_annotation + "\n##Intersecting File: " + additional_annotation + '\n')
19
- for pos, data in genes_To_Keep.items():
20
- pos_ = pos.split(',')
21
- start = pos_[0]
22
- stop = pos_[-1]
23
- strand = data[0]
24
- ref = reference_annotation.split('/')[-1].split('.')[0]
25
- data[4] = data[4].replace('\n', '').replace('ID=','')
26
- data[5] = data[5].replace('\n', '').replace('ID=','')
27
- entry = (
28
- genome_ID + '\t' + ref + '\t' + data[2] + '\t' + start + '\t' + stop + '\t.\t' + strand + '\t.\tID=Original_Annotation=' + data[4] + ';Additional_Annotation=' + data[5] + ';Coverage=' + str(
29
- data[1]) + '\n')
30
- write_out.write(entry)
31
-
32
-
33
- def comparator(options): # Only works for single contig genome
34
- genome_seq = ""
35
- with open(options.genome_DNA, 'r') as genome_fasta:
36
- for line in genome_fasta:
37
- line = line.replace("\n", "")
38
- if not line.startswith('>'):
39
- genome_seq += str(line)
40
- else:
41
- genome_ID = line.split()[0].replace('>', '')
42
- ###########################################
43
- if not options.reference_tool: # IF using Ensembl for comparison
44
- ref_genes = collections.OrderedDict() # Order is important
45
- count = 0
46
- with open(options.reference_annotation, 'r') as genome_gff:
47
- for line in genome_gff:
48
- line = line.split('\t')
49
- try:
50
- if 'CDS' in options.gene_ident and len(options.gene_ident) == 1:
51
- if "CDS" in line[2] and len(line) == 9:
52
- start = int(line[3])
53
- stop = int(line[4])
54
- strand = line[6]
55
- pos = str(start) + ',' + str(stop)
56
- info = line[8]
57
- ref_genes.update({pos: [strand, 'ref', 'CDS',info]})
58
- count += 1
59
- else:
60
- gene_types = options.gene_ident.split(',')
61
- if any(gene_type in line[2] for gene_type in gene_types): # line[2] for normalrun
62
- start = int(line[3])
63
- stop = int(line[4])
64
- strand = line[6]
65
- pos = str(start) + ',' + str(stop)
66
- info = line[8]
67
- ref_genes.update(
68
- {pos: [strand, 'ref', line[2],info]}) # Report what type of gene/rRNA etc we have here
69
- count += 1
70
- except IndexError:
71
- continue
72
- else: # IF using a tool as reference
73
- try:
74
- reference_tool_ = import_module('Tools.' + options.reference_tool + '.' + options.reference_tool,
75
- package='my_current_pkg')
76
- except ModuleNotFoundError:
77
- try:
78
- reference_tool_ = import_module('ORForise.Tools.' + options.reference_tool + '.' + options.reference_tool,
79
- package='my_current_pkg')
80
- except ModuleNotFoundError:
81
- sys.exit("Tool not available")
82
- reference_tool_ = getattr(reference_tool_, options.reference_tool)
83
- ############ Reformatting tool output for ref_genes
84
- ref_genes = reference_tool_(options.reference_annotation, genome_seq)
85
- ref_gene_set = list(ref_genes.keys())
86
- ############################## Get Add'l
87
- try:
88
- additional_tool_ = import_module('Tools.' + options.additional_tool + '.' + options.additional_tool,
89
- package='my_current_pkg')
90
- except ModuleNotFoundError:
91
- try:
92
- additional_tool_ = import_module('ORForise.Tools.' + options.additional_tool + '.' + options.additional_tool,
93
- package='my_current_pkg')
94
- except ModuleNotFoundError:
95
- sys.exit("Tool not available")
96
- additional_tool_ = getattr(additional_tool_, options.additional_tool)
97
- additional_orfs = additional_tool_(options.additional_annotation,genome_seq,options.gene_ident)
98
- ##############################
99
-
100
-
101
- genes_To_Keep = collections.OrderedDict()
102
-
103
- if options.coverage == 100:
104
- for orf, data in additional_orfs.items():
105
- o_Start = int(orf.split(',')[0])
106
- o_Stop = int(orf.split(',')[1])
107
- o_Strand = data[0]
108
- additional_type = data[3]
109
- additional_info = data[4]
110
- try:
111
- ref_type = ref_genes[str(o_Start) + ',' + str(o_Stop)][2]
112
- ref_info = ref_genes[str(o_Start) + ',' + str(o_Stop)][3]
113
- except KeyError:
114
- continue
115
- #try:
116
- #if ref_genes[str(o_Start) + ',' + str(o_Stop)][2] == "CDS" : # Make sure 100% match and is also CDS
117
- if additional_type == ref_type:
118
- genes_To_Keep.update({str(o_Start) + ',' + str(o_Stop): [o_Strand, options.coverage,additional_type,ref_type,additional_info,ref_info]}) # o_ and g_ would be the same here
119
- #except KeyError:
120
- # continue
121
- else:
122
- for orf, data in additional_orfs.items(): # Currently allows ORF to be bigger than Gene
123
- o_Start = int(orf.split(',')[0])
124
- o_Stop = int(orf.split(',')[1])
125
- o_Strand = data[0]
126
- orf_Set = set(range(int(o_Start), int(o_Stop) + 1))
127
- for gene, r_data in ref_genes.items(): # Very ineffecient
128
- g_Start = int(gene.split(',')[0])
129
- g_Stop = int(gene.split(',')[1])
130
- g_Strand = r_data[0]
131
- gene_Set = set(range(int(g_Start), int(g_Stop) + 1))
132
- overlap = len(orf_Set.intersection(gene_Set))
133
- cov = 100 * float(overlap) / float(len(gene_Set))
134
-
135
- additional_type = data[3]
136
- additional_info = data[4]
137
- ref_type = r_data[2]
138
- ref_info = r_data[3]
139
-
140
- if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and cov >= options.coverage:
141
- if additional_type == ref_type:
142
- genes_To_Keep.update({str(g_Start) + ',' + str(g_Stop): [g_Strand, int(cov),additional_type,ref_type,additional_info,ref_info]})
143
- if g_Start > o_Stop:
144
- break
145
- #########################################################
146
- #### Currently, only CDSs are filtered
147
- # for gene, g_data in ref_genes.items(): # Very ineffecient
148
- # if "CDS" not in g_data[2]:
149
- # g_Start = int(gene.split(',')[0])
150
- # g_Stop = int(gene.split(',')[1])
151
- # g_Strand = g_data[0]
152
- # genes_To_Keep.update({str(g_Start) + ',' + str(g_Stop): [g_Strand, "N/A",g_data[2]]})
153
- genes_To_Keep = sortORFs(genes_To_Keep)
154
- gff_writer(genome_ID, options.genome_DNA, options.reference_annotation, options.reference_tool, ref_gene_set, options.additional_annotation, options.additional_tool, genes_To_Keep, options.output_file)
155
-
156
- def main():
157
- print("Thank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n#####")
158
-
159
- parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': GFF-Intersector Run Parameters.')
160
- parser._action_groups.pop()
161
-
162
- required = parser.add_argument_group('Required Arguments')
163
- required.add_argument('-dna', dest='genome_DNA', required=True, help='Genome DNA file (.fa) which both annotations '
164
- 'are based on')
165
- required.add_argument('-ref', dest='reference_annotation', required=True,
166
- help='Which reference annotation file to use as reference?')
167
- required.add_argument('-at', dest='additional_tool', required=True,
168
- help='Which format to use for additional annotation?')
169
- required.add_argument('-add', dest='additional_annotation', required=True,
170
- help='Which annotation file to add to reference annotation?')
171
- required.add_argument('-o', dest='output_file', required=True,
172
- help='Output filename')
173
-
174
- optional = parser.add_argument_group('Optional Arguments')
175
- optional.add_argument('-rt', dest='reference_tool', required=False,
176
- help='Which tool format to use as reference? - If not provided, will default to the '
177
- 'standard GFF format and will only look for "CDS" features')
178
- optional.add_argument('-gi', dest='gene_ident', default='CDS', required=False,
179
- help='Identifier used for extraction of "genic" regions from reference annotation '
180
- '"CDS,rRNA,tRNA": Default for is "CDS" ')
181
- optional.add_argument('-cov', dest='coverage', default=100, type=int, required=False,
182
- help='Percentage coverage of reference annotation needed to confirm intersection'
183
- ' - Default: 100 == exact match')
184
-
185
- options = parser.parse_args()
186
- comparator(options)
187
-
188
-
189
-
190
- if __name__ == "__main__":
191
- main()
192
- print("Complete")