ORForise 1.4.3__py3-none-any.whl → 1.5.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. ORForise/Aggregate_Compare.py +318 -133
  2. ORForise/Annotation_Compare.py +243 -125
  3. ORForise/Comparator.py +600 -552
  4. ORForise/ORForise_Analysis/genome_Metrics.py +51 -33
  5. ORForise/Tools/Augustus/Augustus.py +30 -23
  6. ORForise/Tools/Balrog/Balrog.py +31 -23
  7. ORForise/Tools/EasyGene/EasyGene.py +30 -22
  8. ORForise/Tools/FGENESB/FGENESB.py +32 -25
  9. ORForise/Tools/FragGeneScan/FragGeneScan.py +29 -22
  10. ORForise/Tools/GFF/GFF.py +51 -47
  11. ORForise/Tools/GLIMMER_3/GLIMMER_3.py +34 -27
  12. ORForise/Tools/GeneMark/GeneMark.py +46 -40
  13. ORForise/Tools/GeneMark_HA/GeneMark_HA.py +29 -22
  14. ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +29 -22
  15. ORForise/Tools/GeneMark_S/GeneMark_S.py +29 -22
  16. ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +29 -25
  17. ORForise/Tools/MetaGene/MetaGene.py +29 -22
  18. ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +30 -23
  19. ORForise/Tools/MetaGeneMark/MetaGeneMark.py +30 -23
  20. ORForise/Tools/Prodigal/Prodigal.py +30 -26
  21. ORForise/Tools/Prokka/Prokka.py +30 -25
  22. ORForise/Tools/StORF_Reporter/StORF_Reporter.py +33 -26
  23. ORForise/Tools/TransDecoder/TransDecoder.py +29 -22
  24. ORForise/utils.py +204 -2
  25. {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/METADATA +5 -5
  26. {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/RECORD +30 -30
  27. {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/entry_points.txt +5 -0
  28. {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/WHEEL +0 -0
  29. {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/licenses/LICENSE +0 -0
  30. {orforise-1.4.3.dist-info → orforise-1.5.0.dist-info}/top_level.txt +0 -0
@@ -7,48 +7,54 @@ except ImportError:
7
7
  from ORForise.utils import revCompIterative
8
8
  from ORForise.utils import sortORFs
9
9
 
10
- def GeneMark(tool_pred, genome):
10
+ def GeneMark(*args):
11
+ tool_pred = args[0]
12
+ dna_regions = args[1]
11
13
  geneMark_ORFs = collections.OrderedDict()
12
- genome_size = len(genome)
13
- genome_rev = revCompIterative(genome)
14
- prev_Start = 0
15
- prev_Stop = 0
16
- started = False
17
- with open(tool_pred, 'r') as GeneMark_input:
18
- for line in GeneMark_input:
19
- line = line.split()
20
- if len(line) == 7:
21
- started = True
22
- if 'direct' in line[2] or 'complement' in line[
23
- 2]: # Strange Output requires strange code - We select the Longest ORF from each set
24
- start = int(line[0])
25
- stop = int(line[1])
26
- strand = line[2]
27
- if 'complement' in strand: # Reverse Compliment starts and stops adjusted
28
- if start != prev_Start:
29
- r_start = genome_size - stop
30
- r_stop = genome_size - start
31
- strand = '-'
32
- startCodon = genome_rev[r_start:r_start + 3]
33
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
- po = str(start) + ',' + str(stop)
35
- orf = [strand, startCodon, stopCodon, 'CDS']
36
- geneMark_ORFs.update({po: orf})
37
- elif 'direct' in strand:
38
- if stop != prev_Stop:
39
- startCodon = genome[start - 1:start + 2]
40
- stopCodon = genome[stop - 3:stop]
41
- strand = '+'
42
- po = str(start) + ',' + str(stop)
43
- orf = [strand, startCodon, stopCodon, 'CDS']
44
- geneMark_ORFs.update({po: orf})
45
- prev_Start = start
46
- prev_Stop = stop
47
- elif len(line) == 0 and started == True:
48
- prev_Stop = 0
49
- prev_Start = 0
14
+ for dna_region in dna_regions:
15
+ geneMark_ORFs[dna_region] = collections.OrderedDict()
16
+ for dna_region in dna_regions:
17
+ genome = dna_regions[dna_region][0]
18
+ genome_size = len(genome)
19
+ genome_rev = revCompIterative(genome)
20
+ prev_Start = 0
21
+ prev_Stop = 0
22
+ started = False
23
+ with open(tool_pred, 'r') as GeneMark_input:
24
+ for line in GeneMark_input:
25
+ line = line.split()
26
+ if len(line) == 7:
27
+ started = True
28
+ if 'direct' in line[2] or 'complement' in line[2] and dna_region in line[0]: # Strange Output requires strange code - We select the Longest ORF from each set
29
+ start = int(line[0])
30
+ stop = int(line[1])
31
+ strand = line[2]
32
+ if 'complement' in strand: # Reverse Compliment starts and stops adjusted
33
+ if start != prev_Start:
34
+ r_start = genome_size - stop
35
+ r_stop = genome_size - start
36
+ strand = '-'
37
+ startCodon = genome_rev[r_start:r_start + 3]
38
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
39
+ po = str(start) + ',' + str(stop)
40
+ orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
41
+ geneMark_ORFs.update({po: orf})
42
+ elif 'direct' in strand:
43
+ if stop != prev_Stop:
44
+ startCodon = genome[start - 1:start + 2]
45
+ stopCodon = genome[stop - 3:stop]
46
+ strand = '+'
47
+ po = str(start) + ',' + str(stop)
48
+ orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark']
49
+ geneMark_ORFs.update({po: orf})
50
+ prev_Start = start
51
+ prev_Stop = stop
52
+ elif len(line) == 0 and started == True:
53
+ prev_Stop = 0
54
+ prev_Start = 0
50
55
 
51
- geneMark_ORFs = sortORFs(geneMark_ORFs)
56
+ for group in geneMark_ORFs:
57
+ geneMark_ORFs[group] = sortORFs(geneMark_ORFs[group])
52
58
  return geneMark_ORFs
53
59
 
54
60
  ############# This section can be used to select the ORF with highest probability score.
@@ -8,28 +8,35 @@ except ImportError:
8
8
  from ORForise.utils import sortORFs
9
9
 
10
10
 
11
- def GeneMark_HA(tool_pred, genome):
11
+ def GeneMark_HA(*args):
12
+ tool_pred = args[0]
13
+ dna_regions = args[1]
12
14
  geneMark_HA_ORFs = collections.OrderedDict()
13
- genome_size = len(genome)
14
- genome_rev = revCompIterative(genome)
15
- with open(tool_pred, 'r') as GeneMark_HA_input:
16
- for line in GeneMark_HA_input:
17
- line = line.split()
18
- if len(line) >= 9 and "CDS" in line[5]:
19
- start = int(line[6])
20
- stop = int(line[7])
21
- strand = line[9]
22
- if '-' in strand: # Reverse Compliment starts and stops adjusted
23
- r_start = genome_size - stop
24
- r_stop = genome_size - start
25
- startCodon = genome_rev[r_start:r_start + 3]
26
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
27
- elif '+' in strand:
28
- startCodon = genome[start - 1:start + 2]
29
- stopCodon = genome[stop - 3:stop]
30
- po = str(start) + ',' + str(stop)
31
- orf = [strand, startCodon, stopCodon, 'CDS']
32
- geneMark_HA_ORFs.update({po: orf})
15
+ for dna_region in dna_regions:
16
+ geneMark_HA_ORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as GeneMark_HA_input:
22
+ for line in GeneMark_HA_input:
23
+ line = line.split()
24
+ if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
25
+ start = int(line[6])
26
+ stop = int(line[7])
27
+ strand = line[9]
28
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
29
+ r_start = genome_size - stop
30
+ r_stop = genome_size - start
31
+ startCodon = genome_rev[r_start:r_start + 3]
32
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
33
+ elif '+' in strand:
34
+ startCodon = genome[start - 1:start + 2]
35
+ stopCodon = genome[stop - 3:stop]
36
+ po = str(start) + ',' + str(stop)
37
+ orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_HA']
38
+ geneMark_HA_ORFs.update({po: orf})
33
39
 
34
- geneMark_HA_ORFs = sortORFs(geneMark_HA_ORFs)
40
+ for group in geneMark_HA_ORFs:
41
+ geneMark_HA_ORFs[group] = sortORFs(geneMark_HA_ORFs[group])
35
42
  return geneMark_HA_ORFs
@@ -9,28 +9,35 @@ except ImportError:
9
9
 
10
10
 
11
11
 
12
- def GeneMark_HMM(tool_pred, genome):
12
+ def GeneMark_HMM(*args):
13
+ tool_pred = args[0]
14
+ dna_regions = args[1]
13
15
  geneMark_HMM_ORFs = collections.OrderedDict()
14
- genome_size = len(genome)
15
- genome_rev = revCompIterative(genome)
16
- with open(tool_pred, 'r') as GeneMark_HMM_input:
17
- for line in GeneMark_HMM_input:
18
- line = line.split('\t')
19
- if len(line) >= 9 and "CDS" in line[2]:
20
- start = int(line[3])
21
- stop = int(line[4])
22
- strand = line[6]
23
- if '-' in strand: # Reverse Compliment starts and stops adjusted
24
- r_start = genome_size - stop
25
- r_stop = genome_size - start
26
- startCodon = genome_rev[r_start:r_start + 3]
27
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
28
- elif '+' in strand:
29
- startCodon = genome[start - 1:start + 2]
30
- stopCodon = genome[stop - 3:stop]
31
- po = str(start) + ',' + str(stop)
32
- orf = [strand, startCodon, stopCodon, 'CDS']
33
- geneMark_HMM_ORFs.update({po: orf})
16
+ for dna_region in dna_regions:
17
+ geneMark_HMM_ORFs[dna_region] = collections.OrderedDict()
18
+ for dna_region in dna_regions:
19
+ genome = dna_regions[dna_region][0]
20
+ genome_size = len(genome)
21
+ genome_rev = revCompIterative(genome)
22
+ with open(tool_pred, 'r') as GeneMark_HMM_input:
23
+ for line in GeneMark_HMM_input:
24
+ line = line.split('\t')
25
+ if len(line) >= 9 and "CDS" in line[2] and dna_region in line[0]:
26
+ start = int(line[3])
27
+ stop = int(line[4])
28
+ strand = line[6]
29
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
30
+ r_start = genome_size - stop
31
+ r_stop = genome_size - start
32
+ startCodon = genome_rev[r_start:r_start + 3]
33
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
+ elif '+' in strand:
35
+ startCodon = genome[start - 1:start + 2]
36
+ stopCodon = genome[stop - 3:stop]
37
+ po = str(start) + ',' + str(stop)
38
+ orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_HMM']
39
+ geneMark_HMM_ORFs.update({po: orf})
34
40
 
35
- geneMark_HMM_ORFs = sortORFs(geneMark_HMM_ORFs)
41
+ for group in geneMark_HMM_ORFs:
42
+ geneMark_HMM_ORFs[group] = sortORFs(geneMark_HMM_ORFs[group])
36
43
  return geneMark_HMM_ORFs
@@ -8,28 +8,35 @@ except ImportError:
8
8
  from ORForise.utils import sortORFs
9
9
 
10
10
 
11
- def GeneMark_S(tool_pred, genome):
11
+ def GeneMark_S(*args):
12
+ tool_pred = args[0]
13
+ dna_regions = args[1]
12
14
  geneMark_S_ORFs = collections.OrderedDict()
13
- genome_size = len(genome)
14
- genome_rev = revCompIterative(genome)
15
- with open(tool_pred, 'r') as prodigal_input:
16
- for line in prodigal_input:
17
- line = line.split()
18
- if len(line) >= 9 and "CDS" in line[5]:
19
- start = int(line[6])
20
- stop = int(line[7])
21
- strand = line[9]
22
- if '-' in strand: # Reverse Compliment starts and stops adjusted
23
- r_start = genome_size - stop
24
- r_stop = genome_size - start
25
- startCodon = genome_rev[r_start:r_start + 3]
26
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
27
- elif '+' in strand:
28
- startCodon = genome[start - 1:start + 2]
29
- stopCodon = genome[stop - 3:stop]
30
- po = str(start) + ',' + str(stop)
31
- orf = [strand, startCodon, stopCodon, 'CDS']
32
- geneMark_S_ORFs.update({po: orf})
15
+ for dna_region in dna_regions:
16
+ geneMark_S_ORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as prodigal_input:
22
+ for line in prodigal_input:
23
+ line = line.split()
24
+ if len(line) >= 9 and "CDS" in line[5] and dna_region in line[0]:
25
+ start = int(line[6])
26
+ stop = int(line[7])
27
+ strand = line[9]
28
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
29
+ r_start = genome_size - stop
30
+ r_stop = genome_size - start
31
+ startCodon = genome_rev[r_start:r_start + 3]
32
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
33
+ elif '+' in strand:
34
+ startCodon = genome[start - 1:start + 2]
35
+ stopCodon = genome[stop - 3:stop]
36
+ po = str(start) + ',' + str(stop)
37
+ orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S']
38
+ geneMark_S_ORFs.update({po: orf})
33
39
 
34
- geneMark_S_ORFs = sortORFs(geneMark_S_ORFs)
40
+ for group in geneMark_S_ORFs:
41
+ geneMark_S_ORFs[group] = sortORFs(geneMark_S_ORFs[group])
35
42
  return geneMark_S_ORFs
@@ -10,30 +10,34 @@ except ImportError:
10
10
 
11
11
  def GeneMark_S_2(*args):
12
12
  tool_pred = args[0]
13
- genome = args[1]
14
- types = args[2]
15
- geneMark_S_2_ORFs = collections.defaultdict(list)
16
- genome_size = len(genome)
17
- genome_rev = revCompIterative(genome)
18
- with open(tool_pred, 'r') as GeneMark_S_2_input:
19
- for line in GeneMark_S_2_input:
20
- line = line.split('\t')
21
- if len(line) >= 9 and "CDS" in line[2]:
22
- start = int(line[3])
23
- stop = int(line[4])
24
- strand = line[6]
25
- info = line[8]
26
- if '-' in strand: # Reverse Compliment starts and stops adjusted
27
- r_start = genome_size - stop
28
- r_stop = genome_size - start
29
- startCodon = genome_rev[r_start:r_start + 3]
30
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
31
- elif '+' in strand:
32
- startCodon = genome[start - 1:start + 2]
33
- stopCodon = genome[stop - 3:stop]
34
- po = str(start) + ',' + str(stop)
35
- orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S_2|'+info]
36
- geneMark_S_2_ORFs.update({po: orf})
13
+ dna_regions = args[1]
14
+ geneMark_S_2_ORFs = collections.defaultdict()
15
+ for dna_region in dna_regions:
16
+ geneMark_S_2_ORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as GeneMark_S_2_input:
22
+ for line in GeneMark_S_2_input:
23
+ line = line.split('\t')
24
+ if len(line) >= 9 and dna_region in line[0] and "CDS" in line[2]:
25
+ start = int(line[3])
26
+ stop = int(line[4])
27
+ strand = line[6]
28
+ info = line[8]
29
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
30
+ r_start = genome_size - stop
31
+ r_stop = genome_size - start
32
+ startCodon = genome_rev[r_start:r_start + 3]
33
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
+ elif '+' in strand:
35
+ startCodon = genome[start - 1:start + 2]
36
+ stopCodon = genome[stop - 3:stop]
37
+ po = str(start) + ',' + str(stop)
38
+ orf = [strand, startCodon, stopCodon, 'CDS', 'GeneMark_S_2']
39
+ geneMark_S_2_ORFs[dna_region].update({po: orf})
37
40
 
38
- geneMark_S_2_ORFs = sortORFs(geneMark_S_2_ORFs)
41
+ for group in geneMark_S_2_ORFs:
42
+ geneMark_S_2_ORFs[group] = sortORFs(geneMark_S_2_ORFs[group])
39
43
  return geneMark_S_2_ORFs
@@ -8,28 +8,35 @@ except ImportError:
8
8
  from ORForise.utils import sortORFs
9
9
 
10
10
 
11
- def MetaGene(tool_pred, genome):
11
+ def MetaGene(*args):
12
+ tool_pred = args[0]
13
+ dna_regions = args[1]
12
14
  metaGene_ORFs = collections.OrderedDict()
13
- genome_size = len(genome)
14
- genome_rev = revCompIterative(genome)
15
- with open(tool_pred, 'r') as MetaGene_input:
16
- for line in MetaGene_input:
17
- line = line.split()
18
- if len(line) >= 6 and ("-" in line or '+' in line):
19
- start = int(line[0])
20
- stop = int(line[1])
21
- strand = line[2]
22
- if '-' in strand: # Reverse Compliment starts and stops adjusted
23
- r_start = genome_size - stop
24
- r_stop = genome_size - start
25
- startCodon = genome_rev[r_start:r_start + 3]
26
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
27
- elif '+' in strand:
28
- startCodon = genome[start - 1:start + 2]
29
- stopCodon = genome[stop - 3:stop]
30
- po = str(start) + ',' + str(stop)
31
- orf = [strand, startCodon, stopCodon, 'CDS']
32
- metaGene_ORFs.update({po: orf})
15
+ for dna_region in dna_regions:
16
+ metaGene_ORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as MetaGene_input:
22
+ for line in MetaGene_input:
23
+ line = line.split()
24
+ if len(line) >= 6 and ("-" in line or '+' in line) and dna_region in line[0]:
25
+ start = int(line[0])
26
+ stop = int(line[1])
27
+ strand = line[2]
28
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
29
+ r_start = genome_size - stop
30
+ r_stop = genome_size - start
31
+ startCodon = genome_rev[r_start:r_start + 3]
32
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
33
+ elif '+' in strand:
34
+ startCodon = genome[start - 1:start + 2]
35
+ stopCodon = genome[stop - 3:stop]
36
+ po = str(start) + ',' + str(stop)
37
+ orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGene']
38
+ metaGene_ORFs.update({po: orf})
33
39
 
34
- metaGene_ORFs = sortORFs(metaGene_ORFs)
40
+ for group in metaGene_ORFs:
41
+ metaGene_ORFs[group] = sortORFs(metaGene_ORFs[group])
35
42
  return metaGene_ORFs
@@ -8,29 +8,36 @@ except ImportError:
8
8
  from ORForise.utils import sortORFs
9
9
 
10
10
 
11
- def MetaGeneAnnotator(tool_pred, genome):
11
+ def MetaGeneAnnotator(*args):
12
+ tool_pred = args[0]
13
+ dna_regions = args[1]
12
14
  metaGeneAnnotator_ORFs = collections.OrderedDict()
13
- genome_size = len(genome)
14
- genome_rev = revCompIterative(genome)
15
- with open(tool_pred, 'r') as MetaGeneAnnotator_input:
16
- for line in MetaGeneAnnotator_input:
17
- line = line.split()
18
- if len(line) == 11:
19
- if "gene_" in line[0]:
20
- start = int(line[1])
21
- stop = int(line[2])
22
- strand = line[3]
23
- if '-' in strand: # Reverse Compliment starts and stops adjusted
24
- r_start = genome_size - stop
25
- r_stop = genome_size - start
26
- startCodon = genome_rev[r_start:r_start + 3]
27
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
28
- elif '+' in strand:
29
- startCodon = genome[start - 1:start + 2]
30
- stopCodon = genome[stop - 3:stop]
31
- po = str(start) + ',' + str(stop)
32
- orf = [strand, startCodon, stopCodon, 'CDS']
33
- metaGeneAnnotator_ORFs.update({po: orf})
15
+ for dna_region in dna_regions:
16
+ metaGeneAnnotator_ORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as MetaGeneAnnotator_input:
22
+ for line in MetaGeneAnnotator_input:
23
+ line = line.split()
24
+ if len(line) == 11 and dna_region in line[0]:
25
+ if "gene_" in line[0]:
26
+ start = int(line[1])
27
+ stop = int(line[2])
28
+ strand = line[3]
29
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
30
+ r_start = genome_size - stop
31
+ r_stop = genome_size - start
32
+ startCodon = genome_rev[r_start:r_start + 3]
33
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
+ elif '+' in strand:
35
+ startCodon = genome[start - 1:start + 2]
36
+ stopCodon = genome[stop - 3:stop]
37
+ po = str(start) + ',' + str(stop)
38
+ orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneAnnotator']
39
+ metaGeneAnnotator_ORFs.update({po: orf})
34
40
 
35
- metaGeneAnnotator_ORFs = sortORFs(metaGeneAnnotator_ORFs)
41
+ for group in metaGeneAnnotator_ORFs:
42
+ metaGeneAnnotator_ORFs[group] = sortORFs(metaGeneAnnotator_ORFs[group])
36
43
  return metaGeneAnnotator_ORFs
@@ -8,29 +8,36 @@ except ImportError:
8
8
  from ORForise.utils import sortORFs
9
9
 
10
10
 
11
- def MetaGeneMark(tool_pred, genome):
11
+ def MetaGeneMark(*args):
12
+ tool_pred = args[0]
13
+ dna_regions = args[1]
12
14
  metaGeneMarkORFs = collections.OrderedDict()
13
- genome_size = len(genome)
14
- genome_rev = revCompIterative(genome)
15
- with open(tool_pred, 'r') as metaGeneMark_input:
16
- for line in metaGeneMark_input:
17
- line = line.split()
18
- if len(line) == 19:
19
- if 'GeneMark.hmm' in line[4] and "CDS" in line[5]:
20
- start = int(line[6])
21
- stop = int(line[7])
22
- strand = line[9]
23
- if '-' in strand: # Reverse Compliment starts and stops adjusted
24
- r_start = genome_size - stop
25
- r_stop = genome_size - start
26
- startCodon = genome_rev[r_start:r_start + 3]
27
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
28
- elif '+' in strand:
29
- startCodon = genome[start - 1:start + 2]
30
- stopCodon = genome[stop - 3:stop]
31
- po = str(start) + ',' + str(stop)
32
- orf = [strand, startCodon, stopCodon, 'CDS']
33
- metaGeneMarkORFs.update({po: orf})
15
+ for dna_region in dna_regions:
16
+ metaGeneMarkORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as metaGeneMark_input:
22
+ for line in metaGeneMark_input:
23
+ line = line.split()
24
+ if len(line) == 19:
25
+ if 'GeneMark.hmm' in line[4] and "CDS" in line[5] and dna_region in line[0]:
26
+ start = int(line[6])
27
+ stop = int(line[7])
28
+ strand = line[9]
29
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
30
+ r_start = genome_size - stop
31
+ r_stop = genome_size - start
32
+ startCodon = genome_rev[r_start:r_start + 3]
33
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
+ elif '+' in strand:
35
+ startCodon = genome[start - 1:start + 2]
36
+ stopCodon = genome[stop - 3:stop]
37
+ po = str(start) + ',' + str(stop)
38
+ orf = [strand, startCodon, stopCodon, 'CDS', 'MetaGeneMark']
39
+ metaGeneMarkORFs.update({po: orf})
34
40
 
35
- metaGeneMarkORFs = sortORFs(metaGeneMarkORFs)
41
+ for group in metaGeneMarkORFs:
42
+ metaGeneMarkORFs[group] = sortORFs(metaGeneMarkORFs[group])
36
43
  return metaGeneMarkORFs
@@ -10,30 +10,34 @@ except ImportError:
10
10
 
11
11
  def Prodigal(*args):
12
12
  tool_pred = args[0]
13
- genome = args[1]
14
- #types = args[2]
15
- prodigalORFs = collections.OrderedDict()
16
- genome_size = len(genome)
17
- genome_rev = revCompIterative(genome)
18
- with open(tool_pred, 'r') as prodigal_input:
19
- for line in prodigal_input:
20
- line = line.split()
21
- if "Prodigal" in line[1] and "CDS" in line[2]:
22
- start = int(line[3])
23
- stop = int(line[4])
24
- strand = line[6]
25
- info = line[8]
26
- if '-' in strand: # Reverse Compliment starts and stops adjusted
27
- r_start = genome_size - stop
28
- r_stop = genome_size - start
29
- startCodon = genome_rev[r_start:r_start + 3]
30
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
31
- elif '+' in strand:
32
- startCodon = genome[start - 1:start + 2]
33
- stopCodon = genome[stop - 3:stop]
34
- po = str(start) + ',' + str(stop)
35
- orf = [strand, startCodon, stopCodon, 'CDS', 'Prodigal|'+info]
36
- prodigalORFs.update({po: orf})
13
+ dna_regions = args[1]
14
+ prodigal_ORFs = collections.OrderedDict()
15
+ for dna_region in dna_regions:
16
+ prodigal_ORFs[dna_region] = collections.OrderedDict()
17
+ for dna_region in dna_regions:
18
+ genome = dna_regions[dna_region][0]
19
+ genome_size = len(genome)
20
+ genome_rev = revCompIterative(genome)
21
+ with open(tool_pred, 'r') as prodigal_input:
22
+ for line in prodigal_input:
23
+ line = line.split()
24
+ if "Prodigal" in line[1] and dna_region in line[0] and "CDS" in line[2]:
25
+ start = int(line[3])
26
+ stop = int(line[4])
27
+ strand = line[6]
28
+ info = line[8]
29
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
30
+ r_start = genome_size - stop
31
+ r_stop = genome_size - start
32
+ startCodon = genome_rev[r_start:r_start + 3]
33
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
+ elif '+' in strand:
35
+ startCodon = genome[start - 1:start + 2]
36
+ stopCodon = genome[stop - 3:stop]
37
+ po = str(start) + ',' + str(stop)
38
+ orf = [strand, startCodon, stopCodon, 'CDS', 'Prodigal']
39
+ prodigal_ORFs[dna_region].update({po: orf})
37
40
 
38
- prodigalORFs = sortORFs(prodigalORFs)
39
- return prodigalORFs
41
+ for group in prodigal_ORFs:
42
+ prodigal_ORFs[group] = sortORFs(prodigal_ORFs[group])
43
+ return prodigal_ORFs