ORForise 1.6.0__py3-none-any.whl → 1.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ORForise/Aggregate_Compare.py +2 -4
  2. ORForise/Annotation_Compare.py +4 -7
  3. ORForise/Annotation_Intersector.py +726 -0
  4. ORForise/Convert_To_GFF.py +6 -5
  5. ORForise/GFF_Adder.py +454 -179
  6. ORForise/List_Tools.py +63 -0
  7. ORForise/StORForise.py +8 -4
  8. ORForise/Tools/EasyGene/EasyGene.py +13 -1
  9. ORForise/Tools/{GLIMMER_3/GLIMMER_3.py → GLIMMER3/GLIMMER3.py} +2 -2
  10. ORForise/Tools/{GeneMark_HA/GeneMark_HA.py → GeneMarkHA/GeneMarkHA.py} +1 -1
  11. ORForise/Tools/GeneMarkHA/__init__.py +0 -0
  12. ORForise/Tools/Prodigal/Prodigal.py +13 -1
  13. ORForise/utils.py +4 -1
  14. {orforise-1.6.0.dist-info → orforise-1.6.1.dist-info}/METADATA +216 -229
  15. {orforise-1.6.0.dist-info → orforise-1.6.1.dist-info}/RECORD +29 -27
  16. {orforise-1.6.0.dist-info → orforise-1.6.1.dist-info}/entry_points.txt +4 -2
  17. ORForise/GFF_Intersector.py +0 -192
  18. /ORForise/{Tools → Aux}/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
  19. /ORForise/{Tools/GLIMMER_3 → Aux/StORF_Undetected/Completely_Undetected}/__init__.py +0 -0
  20. /ORForise/{Tools → Aux}/StORF_Undetected/StORF_Undetected.py +0 -0
  21. /ORForise/{Tools/GeneMark_HA → Aux/StORF_Undetected}/__init__.py +0 -0
  22. /ORForise/{Tools/StORF_Undetected/Completely_Undetected → Aux/StORF_Undetected/unvitiated_Genes}/__init__.py +0 -0
  23. /ORForise/{Tools → Aux}/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
  24. /ORForise/{Tools → Aux}/TabToGFF/TabToGFF.py +0 -0
  25. /ORForise/{Tools/StORF_Undetected → Aux/TabToGFF}/__init__.py +0 -0
  26. /ORForise/{Tools/StORF_Undetected/unvitiated_Genes → Aux}/__init__.py +0 -0
  27. /ORForise/Tools/{TabToGFF → GLIMMER3}/__init__.py +0 -0
  28. {orforise-1.6.0.dist-info → orforise-1.6.1.dist-info}/WHEEL +0 -0
  29. {orforise-1.6.0.dist-info → orforise-1.6.1.dist-info}/licenses/LICENSE +0 -0
  30. {orforise-1.6.0.dist-info → orforise-1.6.1.dist-info}/top_level.txt +0 -0
@@ -1,12 +1,22 @@
1
- ORForise/Aggregate_Compare.py,sha256=WzP34E4YqkOBXlE9obZfPf3Sp1Gwl40WPqE7PsGntqk,22977
2
- ORForise/Annotation_Compare.py,sha256=sMyfeMSp_bOJrv-yfp-5gNeiYkYrw1MD2V_6ZKHiCCE,18528
1
+ ORForise/Aggregate_Compare.py,sha256=kf9O_W3520To9yi2zUtmNmDfWZ2oIGr9rfdpbzg699o,22767
2
+ ORForise/Annotation_Compare.py,sha256=5IbaZX9OCkeP90tDYXLqV20KTsuNCHST1-eqbWjuid8,18436
3
+ ORForise/Annotation_Intersector.py,sha256=8nXMCdEifpLAowtTfD1zUibwpOaS4Y3fRkji_3umfVo,34458
3
4
  ORForise/Comparator.py,sha256=hvoLppG4tq7iBrIJMmm-ckIpRoLYEryTRQrASNWAjs0,48062
4
- ORForise/Convert_To_GFF.py,sha256=FMrERozEsRPW4BMglyIjc6NXpKIb7FFo4X78IrepNX4,5981
5
- ORForise/GFF_Adder.py,sha256=-BlF6DQWcbhyYT88M0ZkoaWA2YDDxsby-7jksfeJN1Q,14057
6
- ORForise/GFF_Intersector.py,sha256=EcDKyJr_47066kma2CguMf3uwzB2tYomPDFjmoX8IoU,9900
7
- ORForise/StORForise.py,sha256=2QU6q3wPK6iqtyKg2jEVwFTB4bSymyc-mSpk7T8yNaY,5431
5
+ ORForise/Convert_To_GFF.py,sha256=V1tg-qT6m7uKFgNNNVPK-bYjFTnSdREJd7y92wrGnvM,5889
6
+ ORForise/GFF_Adder.py,sha256=n-AF-JQv6sL46qJEY761EoG4tG5yzqr30r2Zk0v-DMg,28537
7
+ ORForise/List_Tools.py,sha256=6WxP_j6tLWpJ4-obcgM5pEL__C8eWx0rJnsMHY1jWtM,1666
8
+ ORForise/StORForise.py,sha256=jXa-QmN3tIkN5qiqJcFufIY5VuxwU1k99vppGPWIOa4,5627
8
9
  ORForise/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
9
- ORForise/utils.py,sha256=05pc8q02lzwJG06ndUE5tIVdIkYi3uieiiS919oN0Eg,15548
10
+ ORForise/utils.py,sha256=sEDOATM5v50g9sKbBf2QinaCnGel7EtnKiR-UebtTwQ,15751
11
+ ORForise/Aux/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ ORForise/Aux/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
13
+ ORForise/Aux/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
14
+ ORForise/Aux/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha256=PjCtqenheuofqTaWpfJa7VCiMK0s-9kMnOlGIBl9f7k,1860
15
+ ORForise/Aux/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ ORForise/Aux/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
+ ORForise/Aux/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
18
+ ORForise/Aux/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
19
+ ORForise/Aux/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
20
  ORForise/ORForise_Analysis/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
11
21
  ORForise/ORForise_Analysis/cds_checker.py,sha256=x838-PDd8HxZ3uhfW7wPzaJdiVwomNaYOZzMe-09f_0,2643
12
22
  ORForise/ORForise_Analysis/gene_Lenghts.py,sha256=eDmJqVjBJYkBMuLr4s4XDA-E-fv0eEITpWAPySOynow,939
@@ -21,7 +31,7 @@ ORForise/Tools/Augustus/Augustus.py,sha256=rEZ3h3eHrCfMFDorXxF5h0j4Wr5vTHG_rrQ-1
21
31
  ORForise/Tools/Augustus/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
32
  ORForise/Tools/Balrog/Balrog.py,sha256=wrxQe7Df-iYUq3IQvX8A9GzDy5qR9rt5LHkDnDUngKc,1768
23
33
  ORForise/Tools/Balrog/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
- ORForise/Tools/EasyGene/EasyGene.py,sha256=_1gGRYulpnhgB2xL7ZsnkXF4U8O9XgC0XQTZq7XubC4,1752
34
+ ORForise/Tools/EasyGene/EasyGene.py,sha256=FCvcmL6w_ytefSNvWfWXIG6h0BKObPtTcCp0iGUSdH0,2330
25
35
  ORForise/Tools/EasyGene/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
26
36
  ORForise/Tools/FGENESB/FGENESB.py,sha256=3Jxe2DzUTG77wllSJpN__c_4cdl_gcj2idLXNMkv1Cs,1871
27
37
  ORForise/Tools/FGENESB/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -29,12 +39,12 @@ ORForise/Tools/FragGeneScan/FragGeneScan.py,sha256=ofywMVF-FBM4s3FPwoWsJKQUX0T_i
29
39
  ORForise/Tools/FragGeneScan/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
30
40
  ORForise/Tools/GFF/GFF.py,sha256=Z9xPCWNXrmRVvBR9_PNlajQz8ZYFHvOdXwCskXR1XhI,3219
31
41
  ORForise/Tools/GFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
- ORForise/Tools/GLIMMER_3/GLIMMER_3.py,sha256=McFulHAHV4e3ROVmTn0JSz-r0TTqiEor0MsemsIkSjc,2124
33
- ORForise/Tools/GLIMMER_3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
42
+ ORForise/Tools/GLIMMER3/GLIMMER3.py,sha256=AP5FttPNsGBKrkvT-1EXO66P1LfHELj9rCo3lNWoqQk,2122
43
+ ORForise/Tools/GLIMMER3/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
34
44
  ORForise/Tools/GeneMark/GeneMark.py,sha256=SeovWnoLy7Ktkc37TXLjWWUmmgEMvip7j2XIe5fiYaA,5815
35
45
  ORForise/Tools/GeneMark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
36
- ORForise/Tools/GeneMark_HA/GeneMark_HA.py,sha256=nfMEAszBEZw4zhhW3VtZt6yqJprXkydVAR1a3RrAm1k,1737
37
- ORForise/Tools/GeneMark_HA/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
46
+ ORForise/Tools/GeneMarkHA/GeneMarkHA.py,sha256=FUBYsscSR9Uc1dM5kXKWtu-HJ1s2Dmn2cBapAT0t308,1736
47
+ ORForise/Tools/GeneMarkHA/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
38
48
  ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py,sha256=HK1SWj-M_9AWngMkkWOXQf6sr__kvON8ZL_wYRTMEzk,1753
39
49
  ORForise/Tools/GeneMark_HMM/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
40
50
  ORForise/Tools/GeneMark_S/GeneMark_S.py,sha256=56FQ-u-uvZFN41Ii0tGCUuBWsZaPaxvigbuOVg_4QCw,1722
@@ -47,25 +57,17 @@ ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py,sha256=pfQgzwwBz54kVsLWH7G
47
57
  ORForise/Tools/MetaGeneAnnotator/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
48
58
  ORForise/Tools/MetaGeneMark/MetaGeneMark.py,sha256=_JMGtHvuX-qM-PSFI6EV91Jm86DWluukwGq7lFFCCSo,1848
49
59
  ORForise/Tools/MetaGeneMark/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
50
- ORForise/Tools/Prodigal/Prodigal.py,sha256=y8bnh9_A4DmTJAsKqD_1S_d9ak56ZXsnUiFhrGudNcw,1758
60
+ ORForise/Tools/Prodigal/Prodigal.py,sha256=DwcZmYI0MSrBj180Yj_S-jq3lIYbVUMrf0PQpjsUSx8,2324
51
61
  ORForise/Tools/Prodigal/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
52
62
  ORForise/Tools/Prokka/Prokka.py,sha256=-wKNDcZTbnUpqeqlc7VvXcC0KnwMZ4BduWAlH1p8ULU,1887
53
63
  ORForise/Tools/Prokka/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
64
  ORForise/Tools/StORF_Reporter/StORF_Reporter.py,sha256=areqA94r6nU3GOodnl4QzQbnkMd1XRve0SWn11XoOec,1993
55
65
  ORForise/Tools/StORF_Reporter/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
56
- ORForise/Tools/StORF_Undetected/StORF_Undetected.py,sha256=B7f9AxXD6j2ip4QtuOi7pwtfBCxkexE0XiDCJrKSX5U,1318
57
- ORForise/Tools/StORF_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
58
- ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py,sha256=PjCtqenheuofqTaWpfJa7VCiMK0s-9kMnOlGIBl9f7k,1860
59
- ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
60
- ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
61
- ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py,sha256=notWaFx7AG8BZjBhnGuSyitxa1cRK_7rygOPp9keGfM,1863
62
- ORForise/Tools/TabToGFF/TabToGFF.py,sha256=i9PnODPdIcsLBiCIntiq_3Z8_WeajB8ZE--DeLdRf24,5168
63
- ORForise/Tools/TabToGFF/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
64
66
  ORForise/Tools/TransDecoder/TransDecoder.py,sha256=YlYxxicuP8xjwNkAKbHOdfaurvOHH0whYxaiB6B2kjs,1778
65
67
  ORForise/Tools/TransDecoder/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
66
- orforise-1.6.0.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
67
- orforise-1.6.0.dist-info/METADATA,sha256=GxlEILI_VbuQQtNyTAtEkrKXYRWi83Zb6yGTTTTkmsY,72816
68
- orforise-1.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
69
- orforise-1.6.0.dist-info/entry_points.txt,sha256=FHQMpAIg2IUIQmgdI8A1QWDglVkyEasIAbUFupzL__Y,566
70
- orforise-1.6.0.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
71
- orforise-1.6.0.dist-info/RECORD,,
68
+ orforise-1.6.1.dist-info/licenses/LICENSE,sha256=eAL1bBUjSMCdvudcn9E3sbujCBCa839cqXxauONDbSU,32476
69
+ orforise-1.6.1.dist-info/METADATA,sha256=swyauHPeW5wTZ72-k4BM_xUSwwBvsGrZHUKA_VyEGjc,59554
70
+ orforise-1.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
71
+ orforise-1.6.1.dist-info/entry_points.txt,sha256=_HaBzKQFXCkxHIIgBH_XIOng92-GWJ5FC29LmNaSpR0,670
72
+ orforise-1.6.1.dist-info/top_level.txt,sha256=7kmFicUFY65FJmioc0cpZtXVz93V7KSKvZVWpGz5Hyk,9
73
+ orforise-1.6.1.dist-info/RECORD,,
@@ -1,13 +1,15 @@
1
1
  [console_scripts]
2
2
  Aggregate-Compare = ORForise.Aggregate_Compare:main
3
3
  Annotation-Compare = ORForise.Annotation_Compare:main
4
+ Annotation-Intersector = ORForise.Annotation_Intersector:main
4
5
  Convert-To-GFF = ORForise.Convert_To_GFF:main
5
6
  GFF-Adder = ORForise.GFF_Adder:main
6
- GFF-Intersector = ORForise.GFF_Intersector:main
7
+ List-Tools = ORForise.List_Tools:main
7
8
  StORForise = ORForise.StORForise:main
8
9
  aggregate-compare = ORForise.Aggregate_Compare:main
9
10
  annotation-compare = ORForise.Annotation_Compare:main
11
+ annotation-intersector = ORForise.Annotation_Intersector:main
10
12
  convert-to-gff = ORForise.Convert_To_GFF:main
11
13
  gff-adder = ORForise.GFF_Adder:main
12
- gff-intersector = ORForise.GFF_Intersector:main
14
+ list-tools = ORForise.List_Tools:main
13
15
  storforise = ORForise.StORForise:main
@@ -1,192 +0,0 @@
1
- from importlib import import_module
2
- import argparse
3
- import collections
4
- from datetime import date
5
- import sys
6
- try:
7
- from utils import *
8
- except ImportError:
9
- from .utils import *
10
-
11
- ################################
12
-
13
-
14
- def gff_writer(genome_ID, genome_DNA,reference_annotation, reference_tool, ref_gene_set, additional_annotation, additional_tool, genes_To_Keep, output_file):
15
- write_out = open(output_file, 'w')
16
- write_out.write("##gff-version\t3\n#\tGFF-Intersector\n#\tRun Date:" + str(date.today()) + '\n')
17
- write_out.write("##Genome DNA File:" + genome_DNA + '\n')
18
- write_out.write("##Original File: " + reference_annotation + "\n##Intersecting File: " + additional_annotation + '\n')
19
- for pos, data in genes_To_Keep.items():
20
- pos_ = pos.split(',')
21
- start = pos_[0]
22
- stop = pos_[-1]
23
- strand = data[0]
24
- ref = reference_annotation.split('/')[-1].split('.')[0]
25
- data[4] = data[4].replace('\n', '').replace('ID=','')
26
- data[5] = data[5].replace('\n', '').replace('ID=','')
27
- entry = (
28
- genome_ID + '\t' + ref + '\t' + data[2] + '\t' + start + '\t' + stop + '\t.\t' + strand + '\t.\tID=Original_Annotation=' + data[4] + ';Additional_Annotation=' + data[5] + ';Coverage=' + str(
29
- data[1]) + '\n')
30
- write_out.write(entry)
31
-
32
-
33
- def comparator(options): # Only works for single contig genome
34
- genome_seq = ""
35
- with open(options.genome_DNA, 'r') as genome_fasta:
36
- for line in genome_fasta:
37
- line = line.replace("\n", "")
38
- if not line.startswith('>'):
39
- genome_seq += str(line)
40
- else:
41
- genome_ID = line.split()[0].replace('>', '')
42
- ###########################################
43
- if not options.reference_tool: # IF using Ensembl for comparison
44
- ref_genes = collections.OrderedDict() # Order is important
45
- count = 0
46
- with open(options.reference_annotation, 'r') as genome_gff:
47
- for line in genome_gff:
48
- line = line.split('\t')
49
- try:
50
- if 'CDS' in options.gene_ident and len(options.gene_ident) == 1:
51
- if "CDS" in line[2] and len(line) == 9:
52
- start = int(line[3])
53
- stop = int(line[4])
54
- strand = line[6]
55
- pos = str(start) + ',' + str(stop)
56
- info = line[8]
57
- ref_genes.update({pos: [strand, 'ref', 'CDS',info]})
58
- count += 1
59
- else:
60
- gene_types = options.gene_ident.split(',')
61
- if any(gene_type in line[2] for gene_type in gene_types): # line[2] for normalrun
62
- start = int(line[3])
63
- stop = int(line[4])
64
- strand = line[6]
65
- pos = str(start) + ',' + str(stop)
66
- info = line[8]
67
- ref_genes.update(
68
- {pos: [strand, 'ref', line[2],info]}) # Report what type of gene/rRNA etc we have here
69
- count += 1
70
- except IndexError:
71
- continue
72
- else: # IF using a tool as reference
73
- try:
74
- reference_tool_ = import_module('Tools.' + options.reference_tool + '.' + options.reference_tool,
75
- package='my_current_pkg')
76
- except ModuleNotFoundError:
77
- try:
78
- reference_tool_ = import_module('ORForise.Tools.' + options.reference_tool + '.' + options.reference_tool,
79
- package='my_current_pkg')
80
- except ModuleNotFoundError:
81
- sys.exit("Tool not available")
82
- reference_tool_ = getattr(reference_tool_, options.reference_tool)
83
- ############ Reformatting tool output for ref_genes
84
- ref_genes = reference_tool_(options.reference_annotation, genome_seq)
85
- ref_gene_set = list(ref_genes.keys())
86
- ############################## Get Add'l
87
- try:
88
- additional_tool_ = import_module('Tools.' + options.additional_tool + '.' + options.additional_tool,
89
- package='my_current_pkg')
90
- except ModuleNotFoundError:
91
- try:
92
- additional_tool_ = import_module('ORForise.Tools.' + options.additional_tool + '.' + options.additional_tool,
93
- package='my_current_pkg')
94
- except ModuleNotFoundError:
95
- sys.exit("Tool not available")
96
- additional_tool_ = getattr(additional_tool_, options.additional_tool)
97
- additional_orfs = additional_tool_(options.additional_annotation,genome_seq,options.gene_ident)
98
- ##############################
99
-
100
-
101
- genes_To_Keep = collections.OrderedDict()
102
-
103
- if options.coverage == 100:
104
- for orf, data in additional_orfs.items():
105
- o_Start = int(orf.split(',')[0])
106
- o_Stop = int(orf.split(',')[1])
107
- o_Strand = data[0]
108
- additional_type = data[3]
109
- additional_info = data[4]
110
- try:
111
- ref_type = ref_genes[str(o_Start) + ',' + str(o_Stop)][2]
112
- ref_info = ref_genes[str(o_Start) + ',' + str(o_Stop)][3]
113
- except KeyError:
114
- continue
115
- #try:
116
- #if ref_genes[str(o_Start) + ',' + str(o_Stop)][2] == "CDS" : # Make sure 100% match and is also CDS
117
- if additional_type == ref_type:
118
- genes_To_Keep.update({str(o_Start) + ',' + str(o_Stop): [o_Strand, options.coverage,additional_type,ref_type,additional_info,ref_info]}) # o_ and g_ would be the same here
119
- #except KeyError:
120
- # continue
121
- else:
122
- for orf, data in additional_orfs.items(): # Currently allows ORF to be bigger than Gene
123
- o_Start = int(orf.split(',')[0])
124
- o_Stop = int(orf.split(',')[1])
125
- o_Strand = data[0]
126
- orf_Set = set(range(int(o_Start), int(o_Stop) + 1))
127
- for gene, r_data in ref_genes.items(): # Very ineffecient
128
- g_Start = int(gene.split(',')[0])
129
- g_Stop = int(gene.split(',')[1])
130
- g_Strand = r_data[0]
131
- gene_Set = set(range(int(g_Start), int(g_Stop) + 1))
132
- overlap = len(orf_Set.intersection(gene_Set))
133
- cov = 100 * float(overlap) / float(len(gene_Set))
134
-
135
- additional_type = data[3]
136
- additional_info = data[4]
137
- ref_type = r_data[2]
138
- ref_info = r_data[3]
139
-
140
- if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and cov >= options.coverage:
141
- if additional_type == ref_type:
142
- genes_To_Keep.update({str(g_Start) + ',' + str(g_Stop): [g_Strand, int(cov),additional_type,ref_type,additional_info,ref_info]})
143
- if g_Start > o_Stop:
144
- break
145
- #########################################################
146
- #### Currently, only CDSs are filtered
147
- # for gene, g_data in ref_genes.items(): # Very ineffecient
148
- # if "CDS" not in g_data[2]:
149
- # g_Start = int(gene.split(',')[0])
150
- # g_Stop = int(gene.split(',')[1])
151
- # g_Strand = g_data[0]
152
- # genes_To_Keep.update({str(g_Start) + ',' + str(g_Stop): [g_Strand, "N/A",g_data[2]]})
153
- genes_To_Keep = sortORFs(genes_To_Keep)
154
- gff_writer(genome_ID, options.genome_DNA, options.reference_annotation, options.reference_tool, ref_gene_set, options.additional_annotation, options.additional_tool, genes_To_Keep, options.output_file)
155
-
156
- def main():
157
- print("Thank you for using ORForise\nPlease report any issues to: https://github.com/NickJD/ORForise/issues\n#####")
158
-
159
- parser = argparse.ArgumentParser(description='ORForise ' + ORForise_Version + ': GFF-Intersector Run Parameters.')
160
- parser._action_groups.pop()
161
-
162
- required = parser.add_argument_group('Required Arguments')
163
- required.add_argument('-dna', dest='genome_DNA', required=True, help='Genome DNA file (.fa) which both annotations '
164
- 'are based on')
165
- required.add_argument('-ref', dest='reference_annotation', required=True,
166
- help='Which reference annotation file to use as reference?')
167
- required.add_argument('-at', dest='additional_tool', required=True,
168
- help='Which format to use for additional annotation?')
169
- required.add_argument('-add', dest='additional_annotation', required=True,
170
- help='Which annotation file to add to reference annotation?')
171
- required.add_argument('-o', dest='output_file', required=True,
172
- help='Output filename')
173
-
174
- optional = parser.add_argument_group('Optional Arguments')
175
- optional.add_argument('-rt', dest='reference_tool', required=False,
176
- help='Which tool format to use as reference? - If not provided, will default to the '
177
- 'standard GFF format and will only look for "CDS" features')
178
- optional.add_argument('-gi', dest='gene_ident', default='CDS', required=False,
179
- help='Identifier used for extraction of "genic" regions from reference annotation '
180
- '"CDS,rRNA,tRNA": Default for is "CDS" ')
181
- optional.add_argument('-cov', dest='coverage', default=100, type=int, required=False,
182
- help='Percentage coverage of reference annotation needed to confirm intersection'
183
- ' - Default: 100 == exact match')
184
-
185
- options = parser.parse_args()
186
- comparator(options)
187
-
188
-
189
-
190
- if __name__ == "__main__":
191
- main()
192
- print("Complete")
File without changes
File without changes