ORForise 1.4.0__tar.gz → 1.4.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. {ORForise-1.4.0 → orforise-1.4.2}/PKG-INFO +7 -6
  2. {ORForise-1.4.0 → orforise-1.4.2}/README.md +5 -5
  3. {ORForise-1.4.0 → orforise-1.4.2}/setup.cfg +1 -1
  4. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Annotation_Compare.py +4 -8
  5. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Comparator.py +68 -48
  6. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/GFF_Adder.py +0 -2
  7. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/StORForise.py +10 -10
  8. orforise-1.4.2/src/ORForise/Tools/GFF/GFF.py +62 -0
  9. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Reporter/StORF_Reporter.py +4 -3
  10. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/utils.py +2 -13
  11. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/PKG-INFO +7 -6
  12. ORForise-1.4.0/src/ORForise/Tools/GFF/GFF.py +0 -44
  13. {ORForise-1.4.0 → orforise-1.4.2}/LICENSE +0 -0
  14. {ORForise-1.4.0 → orforise-1.4.2}/pyproject.toml +0 -0
  15. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Aggregate_Compare.py +0 -0
  16. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/GFF_Intersector.py +0 -0
  17. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/__init__.py +0 -0
  18. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/cds_checker.py +0 -0
  19. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/gene_Lenghts.py +0 -0
  20. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/genome_Metrics.py +0 -0
  21. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/hypothetical_gene_predictions.py +0 -0
  22. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/missed_Gene_Metrics.py +0 -0
  23. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/parital_Match_Analysis.py +0 -0
  24. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/result_File_Analysis.py +0 -0
  25. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/ORForise_Analysis/start_Codon_Substitution.py +0 -0
  26. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Augustus/Augustus.py +0 -0
  27. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Augustus/__init__.py +0 -0
  28. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Balrog/Balrog.py +0 -0
  29. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Balrog/__init__.py +0 -0
  30. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/EasyGene/EasyGene.py +0 -0
  31. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/EasyGene/__init__.py +0 -0
  32. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FGENESB/FGENESB.py +0 -0
  33. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FGENESB/__init__.py +0 -0
  34. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FragGeneScan/FragGeneScan.py +0 -0
  35. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/FragGeneScan/__init__.py +0 -0
  36. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GFF/__init__.py +0 -0
  37. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GLIMMER_3/GLIMMER_3.py +0 -0
  38. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GLIMMER_3/__init__.py +0 -0
  39. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark/GeneMark.py +0 -0
  40. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark/__init__.py +0 -0
  41. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HA/GeneMark_HA.py +0 -0
  42. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HA/__init__.py +0 -0
  43. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HMM/GeneMark_HMM.py +0 -0
  44. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_HMM/__init__.py +0 -0
  45. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S/GeneMark_S.py +0 -0
  46. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S/__init__.py +0 -0
  47. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S_2/GeneMark_S_2.py +0 -0
  48. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/GeneMark_S_2/__init__.py +0 -0
  49. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGene/MetaGene.py +0 -0
  50. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGene/__init__.py +0 -0
  51. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneAnnotator/MetaGeneAnnotator.py +0 -0
  52. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneAnnotator/__init__.py +0 -0
  53. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneMark/MetaGeneMark.py +0 -0
  54. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/MetaGeneMark/__init__.py +0 -0
  55. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prodigal/Prodigal.py +0 -0
  56. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prodigal/__init__.py +0 -0
  57. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prokka/Prokka.py +0 -0
  58. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/Prokka/__init__.py +0 -0
  59. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Reporter/__init__.py +0 -0
  60. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/Completey_Undetected.py +0 -0
  61. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/Completely_Undetected/__init__.py +0 -0
  62. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/StORF_Undetected.py +0 -0
  63. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/__init__.py +0 -0
  64. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/__init__.py +0 -0
  65. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/StORF_Undetected/unvitiated_Genes/unvitiated_Missed_Genes.py +0 -0
  66. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/TransDecoder/TransDecoder.py +0 -0
  67. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/TransDecoder/__init__.py +0 -0
  68. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/Tools/__init__.py +0 -0
  69. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise/__init__.py +0 -0
  70. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/SOURCES.txt +0 -0
  71. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/dependency_links.txt +0 -0
  72. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/entry_points.txt +0 -0
  73. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/requires.txt +0 -0
  74. {ORForise-1.4.0 → orforise-1.4.2}/src/ORForise.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ORForise
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
5
5
  Home-page: https://github.com/NickJD/ORForise
6
6
  Author: Nicholas Dimonaco
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.6
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: numpy
15
16
 
16
17
  # ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
17
18
  ## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
@@ -20,7 +21,7 @@ License-File: LICENSE
20
21
 
21
22
  # Requirements and Installation:
22
23
 
23
- ### The ORForise platform is written in Python3.8 and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
24
+ ### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
24
25
 
25
26
  ## Intallation:
26
27
 
@@ -61,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
61
62
  usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
62
63
  [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
63
64
 
64
- ORForise v1.4.0: Annotatione-Compare Run Parameters.
65
+ ORForise v1.4.2: Annotatione-Compare Run Parameters.
65
66
 
66
67
  Required Arguments:
67
68
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -111,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
111
112
  usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
112
113
  [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
113
114
 
114
- ORForise v1.4.0: Aggregate-Compare Run Parameters.
115
+ ORForise v1.4.2: Aggregate-Compare Run Parameters.
115
116
 
116
117
  Required Arguments:
117
118
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -265,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
265
266
  usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
266
267
  OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
267
268
 
268
- ORForise v1.4.0: GFF-Adder Run Parameters.
269
+ ORForise v1.4.2: GFF-Adder Run Parameters.
269
270
 
270
271
  Required Arguments:
271
272
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -327,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
327
328
  usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
328
329
  ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
329
330
 
330
- ORForise v1.4.0: GFF-Intersector Run Parameters.
331
+ ORForise v1.4.2: GFF-Intersector Run Parameters.
331
332
 
332
333
  Required Arguments:
333
334
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -5,7 +5,7 @@
5
5
 
6
6
  # Requirements and Installation:
7
7
 
8
- ### The ORForise platform is written in Python3.8 and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
8
+ ### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
9
9
 
10
10
  ## Intallation:
11
11
 
@@ -46,7 +46,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
46
46
  usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
47
47
  [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
48
48
 
49
- ORForise v1.4.0: Annotatione-Compare Run Parameters.
49
+ ORForise v1.4.2: Annotatione-Compare Run Parameters.
50
50
 
51
51
  Required Arguments:
52
52
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -96,7 +96,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
96
96
  usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
97
97
  [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
98
98
 
99
- ORForise v1.4.0: Aggregate-Compare Run Parameters.
99
+ ORForise v1.4.2: Aggregate-Compare Run Parameters.
100
100
 
101
101
  Required Arguments:
102
102
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -250,7 +250,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
250
250
  usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
251
251
  OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
252
252
 
253
- ORForise v1.4.0: GFF-Adder Run Parameters.
253
+ ORForise v1.4.2: GFF-Adder Run Parameters.
254
254
 
255
255
  Required Arguments:
256
256
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -312,7 +312,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
312
312
  usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
313
313
  ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
314
314
 
315
- ORForise v1.4.0: GFF-Intersector Run Parameters.
315
+ ORForise v1.4.2: GFF-Intersector Run Parameters.
316
316
 
317
317
  Required Arguments:
318
318
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -1,6 +1,6 @@
1
1
  [metadata]
2
2
  name = ORForise
3
- version = 1.4.0
3
+ version = 1.4.2
4
4
  author = Nicholas Dimonaco
5
5
  author_email = nicholas@dimonaco.co.uk
6
6
  description = ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
@@ -16,12 +16,8 @@ except ImportError:
16
16
  ##########################
17
17
 
18
18
  def comparator(options):
19
- genome_Seq = ""
20
- with open(options.genome_DNA, 'r') as genome:
21
- for line in genome:
22
- line = line.replace("\n", "")
23
- if not line.startswith('>'):
24
- genome_Seq += str(line)
19
+ with open(options.genome_DNA, mode='r') as genome:
20
+ genome_Seq = "".join(line.rstrip() for line in genome if not line.startswith('>'))
25
21
  ##############################################
26
22
  if not options.reference_tool: # IF using Ensembl for comparison
27
23
  ref_genes = collections.OrderedDict() # Order is important
@@ -76,11 +72,11 @@ def comparator(options):
76
72
  rep_metric_description = list(all_rep_Metrics.keys())
77
73
  rep_metrics = list(all_rep_Metrics.values())
78
74
  ############## Printing to std-out and optional csv file
79
- print('Genome Used: ' + str(options.reference_annotation.split('/')[-1]))
75
+ print('Genome Used: ' + str(options.genome_DNA.split('/')[-1]))
80
76
  if options.reference_tool:
81
77
  print('Reference Tool Used: '+str(options.reference_tool))
82
78
  else:
83
- print('Reference Used: ' + str(options.reference_annotation))
79
+ print('Reference Used: ' + str(options.reference_annotation.split('/')[-1]))
84
80
  print('Tool Compared: '+str(options.tool))
85
81
  print('Perfect Matches: ' + str(len(perfect_Matches)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(perfect_Matches)/len(ref_genes),'.2f')+'%')
86
82
  print('Partial Matches: ' + str(len(partial_Hits)) + ' [' + str(len(ref_genes))+ '] - '+ format(100 * len(partial_Hits)/len(ref_genes),'.2f')+'%')
@@ -1,5 +1,4 @@
1
1
  import numpy as np
2
-
3
2
  try:
4
3
  from utils import *
5
4
  except ImportError:
@@ -46,14 +45,32 @@ comp = comparator()
46
45
  # else:
47
46
  # print ('Key not found')
48
47
 
49
-
50
- def nuc_Count(start, stop, strand): # Gets correct seq then returns GC
51
- if strand == '-':
52
- r_Start = comp.genome_Size - stop
53
- r_Stop = comp.genome_Size - start
54
- seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
55
- elif strand == '+':
56
- seq = (comp.genome_Seq[start - 1:stop])
48
+ def is_double_range(range1, range2):
49
+ return len(range1) >= 2 * len(range2)
50
+ def nuc_Count(verbose, start, stop, strand): # Gets correct seq then returns GC
51
+ if stop >= comp.genome_Size:
52
+ if verbose == True:
53
+ print("There is a wrap around gene and I am dealing with it the best I can - Start: " + str(start) + " Stop: " + str(stop))
54
+ extra_stop = stop - comp.genome_Size
55
+ stop = comp.genome_Size
56
+ if strand == '-':
57
+ r_Start = comp.genome_Size - stop
58
+ r_Stop = comp.genome_Size - start
59
+ seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
60
+ extra_seq = (comp.genome_Seq_Rev[-extra_stop-1:])
61
+ seq = extra_seq+seq
62
+ elif strand == '+':
63
+ seq = comp.genome_Seq[start - 1:stop]
64
+ extra_seq = comp.genome_Seq[:extra_stop +1]
65
+ seq = seq+extra_seq
66
+ #seq = (comp.genome_Seq[start - 1:stop])
67
+ else:
68
+ if strand == '-':
69
+ r_Start = comp.genome_Size - stop
70
+ r_Stop = comp.genome_Size - start
71
+ seq = (comp.genome_Seq_Rev[r_Start:r_Stop + 1])
72
+ elif strand == '+':
73
+ seq = (comp.genome_Seq[start - 1:stop])
57
74
  c = 0
58
75
  a = 0
59
76
  g = 0
@@ -263,6 +280,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
263
280
  comp.genome_Seq = genome
264
281
  comp.genome_Seq_Rev = revCompIterative(genome)
265
282
  comp.genome_Size = len(genome)
283
+
284
+ better_pos_orfs_items = [[(int(pos.split(',')[0]), int(pos.split(',')[1])), orf_Details] for pos, orf_Details in orfs.items()] #TODO: turn pos into tuple instead of string everywhere
285
+
266
286
  for gene_num, gene_details in ref_genes.items(): # Loop through each gene to compare against predicted ORFs
267
287
  g_Start = int(gene_details[0])
268
288
  g_Stop = int(gene_details[1])
@@ -273,9 +293,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
273
293
  overlapping_ORFs = collections.OrderedDict()
274
294
  perfect_Match = False
275
295
  out_Frame = False
276
- for pos, orf_Details in orfs.items(): # Check if perfect match, if not check if match covers at least 75% of gene - Loop through ALL ORFs - SLOW
277
- o_Start = int(pos.split(',')[0])
278
- o_Stop = int(pos.split(',')[1])
296
+ for pos, orf_Details in better_pos_orfs_items: # Check if perfect match, if not check if match covers at least 75% of gene - Loop through ALL ORFs - SLOW
297
+ o_Start,o_Stop = pos
279
298
  o_Strand = orf_Details[0]
280
299
  #orf_Set = set(range(o_Start, o_Stop + 1)) Removed for optimisation
281
300
  if o_Stop <= g_Start or o_Start >= g_Stop: # Not caught up yet
@@ -283,15 +302,17 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
283
302
  elif o_Start == g_Start and o_Stop == g_Stop: # If perfect match, break and skip the rest of the ORFs
284
303
  perfect_Match = True
285
304
  break
305
+ elif is_double_range(range(o_Start, o_Stop), range(g_Start,g_Stop)): # If ORF is double or more than the length of the gene, we do not count as found.
306
+ continue
286
307
  elif g_Start <= o_Start < g_Stop or g_Start < o_Stop < g_Stop: # If ORF Start or Stop is between gene Start or Stop
287
308
  #overlap = len(gene_Set.intersection(orf_Set)) # Replaced for optimisation
288
309
  overlap = max(min(o_Stop, g_Stop) - max(o_Start, g_Start), -1) + 1
289
310
  coverage = 100 * float(overlap) / float(len(gene_Set))
290
311
  orf_Details.append(coverage)
291
312
  if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and coverage >= MIN_COVERAGE: # Only continue if ORF covers at least 75% of the gene and is in frame
292
- overlapping_ORFs.update({pos: orf_Details})
313
+ overlapping_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
293
314
  elif coverage >= MIN_COVERAGE: # Not in frame / on same strand
294
- comp.out_Of_Frame_ORFs.update({pos: orf_Details})
315
+ comp.out_Of_Frame_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
295
316
  out_Frame = True
296
317
  elif o_Start <= g_Start and o_Stop >= g_Stop: # If ORF extends one or both ends of the gene
297
318
  #overlap = len(gene_Set.intersection(orf_Set)) # Replaced for optimisation
@@ -299,9 +320,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
299
320
  coverage = 100 * float(overlap) / float(len(gene_Set))
300
321
  orf_Details.append(coverage)
301
322
  if abs(o_Stop - g_Stop) % 3 == 0 and o_Strand == g_Strand and coverage >= MIN_COVERAGE: # Only continue if ORF covers at least 75% of the gene and is in frame
302
- overlapping_ORFs.update({pos: orf_Details})
323
+ overlapping_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
303
324
  elif coverage >= MIN_COVERAGE:
304
- comp.out_Of_Frame_ORFs.update({pos: orf_Details})
325
+ comp.out_Of_Frame_ORFs.update({f'{o_Start},{o_Stop}': orf_Details})
305
326
  out_Frame = True
306
327
  else:
307
328
  if verbose == True:
@@ -319,8 +340,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
319
340
  comp.genes_Detected.update({str(gene_details): g_pos})
320
341
  match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
321
342
  perfect_Matched_Genes(g_Start, g_Stop, g_Strand)
322
- if verbose == True:
323
- print('Perfect Match')
343
+ #if verbose == True:
344
+ # print('Perfect Match')
324
345
  elif perfect_Match == False and len(
325
346
  overlapping_ORFs) == 1: # If we do not have a perfect match but 1 ORF which has passed the filtering
326
347
  orf_Pos = list(overlapping_ORFs.keys())[0]
@@ -340,8 +361,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
340
361
  comp.matched_ORFs.update({orf_Pos: m_ORF_Details})
341
362
  comp.genes_Detected.update({str(gene_details): orf_Pos})
342
363
  match_Statistics(o_Start, o_Stop, g_Start, g_Stop, g_Strand)
343
- if verbose == True:
344
- print('Partial Match')
364
+ #if verbose == True:
365
+ # print('Partial Match')
345
366
  partial_Hit_Calc(g_Start, g_Stop, g_Strand, o_Start, o_Stop)
346
367
  elif perfect_Match == False and len(
347
368
  overlapping_ORFs) >= 1: # If we have more than 1 potential ORF match, we check to see which is the 'best' hit
@@ -370,8 +391,8 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
370
391
  genes_Unmatched(g_Start, g_Stop, g_Strand) #
371
392
  else:
372
393
  genes_Unmatched(g_Start, g_Stop, g_Strand) # No hit
373
- if verbose == True:
374
- print("No Hit")
394
+ #if verbose == True:
395
+ # print("No Hit")
375
396
  for orf_Key in comp.matched_ORFs: # Remove ORFs from out of frame if ORF was correctly matched to another Gene
376
397
  if orf_Key in comp.out_Of_Frame_ORFs:
377
398
  del comp.out_Of_Frame_ORFs[orf_Key]
@@ -391,9 +412,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
391
412
  atg_P, gtg_P, ttg_P, att_P, ctg_P, other_Start_P, other_Starts = start_Codon_Count(orfs)
392
413
  tag_P, taa_P, tga_P, other_Stop_P, other_Stops = stop_Codon_Count(orfs)
393
414
  # Count nucleotides found from ALL ORFs
394
- gene_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.int)
395
- orf_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.int)
396
- matched_ORF_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.int)
415
+ gene_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
416
+ orf_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
417
+ matched_ORF_Nuc_Array = np.zeros((comp.genome_Size), dtype=np.bool)
397
418
 
398
419
  prev_Gene_Stop = 0
399
420
  prev_Gene_Overlapped = False
@@ -401,10 +422,11 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
401
422
  g_Start = int(gene_details[0])
402
423
  g_Stop = int(gene_details[1])
403
424
  g_Strand = gene_details[2]
404
- gene_Length = (g_Stop - g_Start)
425
+ gene_Length = (g_Stop - g_Start) +1
426
+ if gene_Length == 0: print(g_Start, g_Stop, "!!!!!!!!!!!!!!!!!!!!!!!!")
405
427
  comp.gene_Lengths.append(gene_Length)
406
- gene_Nuc_Array[g_Start - 1:g_Stop] = [1] # Changing all between the two positions to 1's
407
- comp.gene_GC.append(nuc_Count(g_Start, g_Stop, g_Strand))
428
+ gene_Nuc_Array[g_Start - 1:g_Stop] = True # Changing all between the two positions to 1's
429
+ comp.gene_GC.append(nuc_Count(verbose, g_Start, g_Stop, g_Strand))
408
430
  if gene_Length <= SHORT_ORF_LENGTH: # .utils
409
431
  comp.gene_Short.append(gene_Length)
410
432
  ### Calculate overlapping Genes -
@@ -445,10 +467,10 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
445
467
  comp.pos_Strand += 1
446
468
  elif o_Strand == "-":
447
469
  comp.neg_Strand += 1
448
- orf_Length = (o_Stop - o_Start)
470
+ orf_Length = (o_Stop - o_Start) +1
449
471
  comp.orf_Lengths.append(orf_Length)
450
- orf_Nuc_Array[o_Start - 1:o_Stop] = [1] # Changing all between the two positions to 1's
451
- comp.orf_GC.append(nuc_Count(o_Start, o_Stop, o_Strand))
472
+ orf_Nuc_Array[o_Start - 1:o_Stop] = True # Changing all between the two positions to 1's
473
+ comp.orf_GC.append(nuc_Count(verbose, o_Start, o_Stop, o_Strand))
452
474
  if orf_Length <= SHORT_ORF_LENGTH: # .utils
453
475
  comp.orf_Short.append(orf_Length)
454
476
  ### Calculate overlapping ORFs -
@@ -480,9 +502,9 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
480
502
  mo_Stop = int(mo_Positions.split(',')[1])
481
503
  mo_Strand = m_ORF_Details[0]
482
504
  mo_Length = (mo_Stop - mo_Start)
483
- matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = [1] # This is the complete matched orf not the matched orf bits
505
+ matched_ORF_Nuc_Array[mo_Start - 1:mo_Stop] = True # This is the complete matched orf not the matched orf bits
484
506
 
485
- comp.m_ORF_GC.append(nuc_Count(mo_Start, mo_Stop, mo_Strand))
507
+ comp.m_ORF_GC.append(nuc_Count(verbose, mo_Start, mo_Stop, mo_Strand))
486
508
  if mo_Length <= SHORT_ORF_LENGTH: # .utils
487
509
  comp.m_ORF_Short.append(mo_Length)
488
510
  ### Calculate overlapping Matched ORFs -
@@ -506,30 +528,28 @@ def tool_comparison(ref_genes, orfs, genome, verbose):
506
528
  elif '-' in mo_Strand:
507
529
  comp.m_ORF_Neg_Olap.append(0)
508
530
  ####
509
- gene_Coverage_Genome = format(100 * np.count_nonzero(gene_Nuc_Array) / comp.genome_Size, '.2f')
510
- orf_Coverage_Genome = format(100 * np.count_nonzero(orf_Nuc_Array) / comp.genome_Size, '.2f')
511
- matched_ORF_Coverage_Genome = format(100 * np.count_nonzero(matched_ORF_Nuc_Array) / comp.genome_Size,
531
+ gene_Coverage_Genome = format(100 * np.sum(gene_Nuc_Array) / comp.genome_Size, '.2f')
532
+ orf_Coverage_Genome = format(100 * np.sum(orf_Nuc_Array) / comp.genome_Size, '.2f')
533
+ matched_ORF_Coverage_Genome = format(100 * np.sum(matched_ORF_Nuc_Array) / comp.genome_Size,
512
534
  '.2f') # This gets the nts which are in matched ORFs - Check below
513
535
  # matched_ORF_Nuc_AND_Gene = np.logical_and(matched_ORF_Nuc_Array,gene_Nuc_Array) + [0 for i in range(len(gene_Nuc_Array))] # This gets the nts which are in both matched ORFs and detected genes
514
536
  # matched_ORF_Coverage_Genome = format(100 * np.count_nonzero(matched_ORF_Nuc_AND_Gene) / comp.genome_Size,'.2f')
515
537
 
516
538
  # gene and orf nucleotide Intersection
517
- gene_ORF_Nuc_Intersection = np.count_nonzero(gene_Nuc_Array & orf_Nuc_Array)
539
+ gene_ORF_Nuc_Intersection = np.sum(gene_Nuc_Array & orf_Nuc_Array)
518
540
  # not gene but orf nucleotides
519
- not_Gene_Nuc_Array = np.logical_not(gene_Nuc_Array) + [0 for i in range(
520
- len(gene_Nuc_Array))] # End part to keep array as 1,0 not T,F
521
- not_Gene_Nuc_And_ORF_Count = np.count_nonzero(not_Gene_Nuc_Array & orf_Nuc_Array)
541
+ not_Gene_Nuc_Array = np.logical_not(gene_Nuc_Array)
542
+ not_Gene_Nuc_And_ORF_Count = np.sum(not_Gene_Nuc_Array & orf_Nuc_Array)
522
543
  # not orf nucleotides but gene
523
- not_ORF_Nuc_Array = np.logical_not(orf_Nuc_Array) + [0 for i in range(
524
- len(orf_Nuc_Array))] # End part to keep array as 1,0 not T,F
525
- not_ORF_Nuc_And_Gene_Count = np.count_nonzero(not_ORF_Nuc_Array & gene_Nuc_Array)
544
+ not_ORF_Nuc_Array = np.logical_not(orf_Nuc_Array)
545
+ not_ORF_Nuc_And_Gene_Count = np.sum(not_ORF_Nuc_Array & gene_Nuc_Array)
526
546
  # not gene or orf nucleotides
527
- not_Gene_Nuc_Not_ORF_Nuc_Count = np.count_nonzero(not_Gene_Nuc_Array & not_ORF_Nuc_Array)
547
+ not_Gene_Nuc_Not_ORF_Nuc_Count = np.sum(not_Gene_Nuc_Array & not_ORF_Nuc_Array)
528
548
  # Nucleotide 'accuracy' - Normalised by number of nucelotides annotated by a gene
529
- NT_TP = format(gene_ORF_Nuc_Intersection / np.count_nonzero(gene_Nuc_Array), '.2f')
530
- NT_FP = format(not_Gene_Nuc_And_ORF_Count / np.count_nonzero(not_Gene_Nuc_Array), '.2f')
531
- NT_FN = format(not_ORF_Nuc_And_Gene_Count / np.count_nonzero(gene_Nuc_Array), '.2f')
532
- NT_TN = format(not_Gene_Nuc_Not_ORF_Nuc_Count / np.count_nonzero(not_Gene_Nuc_Array), '.2f')
549
+ NT_TP = format(gene_ORF_Nuc_Intersection / np.sum(gene_Nuc_Array), '.2f')
550
+ NT_FP = format(not_Gene_Nuc_And_ORF_Count / np.sum(not_Gene_Nuc_Array), '.2f')
551
+ NT_FN = format(not_ORF_Nuc_And_Gene_Count / np.sum(gene_Nuc_Array), '.2f')
552
+ NT_TN = format(not_Gene_Nuc_Not_ORF_Nuc_Count / np.sum(not_Gene_Nuc_Array), '.2f')
533
553
  NT_Precision = format(gene_ORF_Nuc_Intersection / (gene_ORF_Nuc_Intersection + not_Gene_Nuc_And_ORF_Count), '.2f')
534
554
  NT_Recall = format(gene_ORF_Nuc_Intersection / (gene_ORF_Nuc_Intersection + not_ORF_Nuc_And_Gene_Count), '.2f')
535
555
  NT_False_Discovery_Rate = format(
@@ -30,8 +30,6 @@ def gff_writer(options,genome_ID, genome_DNA, reference_annotation, reference_to
30
30
 
31
31
  for pos, data in combined_ORFs.items():
32
32
  pos_ = pos.split(',')
33
- if '15040' in pos:
34
- print(2)
35
33
  start = pos_[0]
36
34
  stop = pos_[-1]
37
35
  strand = data[0]
@@ -11,7 +11,7 @@ from Comparator import tool_comparison
11
11
 
12
12
  def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare):
13
13
  genome_Seq = ""
14
- with open('Genomes/' + genome_to_compare + '.fa', 'r') as genome:
14
+ with open(genome_to_compare, 'r') as genome:
15
15
  for line in genome:
16
16
  line = line.replace("\n", "")
17
17
  if ">" not in line:
@@ -19,23 +19,23 @@ def comparator(tool, input_to_analyse, storfs_to_find_missing, genome_to_compare
19
19
  ##############################################
20
20
  genes = collections.OrderedDict()
21
21
  count = 0
22
- with open('Tools/StORF_Undetected/' + input_to_analyse, 'r') as genome_gff: # Get list of missed genes
22
+ with open(input_to_analyse, 'r') as genome_gff: # Get list of missed genes
23
23
  for line in genome_gff:
24
24
  if ">" in line:
25
25
  line = line.strip()
26
- Start = int(line.split('_')[1])
27
- Stop = int(line.split('_')[2])
28
- Strand = line.split('_')[3]
29
- Gene = str(Start) + ',' + str(Stop) + ',' + Strand
30
- genes.update({count: Gene})
26
+ start = int(line.split('_')[1])
27
+ stop = int(line.split('_')[2])
28
+ strand = line.split('_')[3]
29
+ gene_details = [start,stop,strand]
30
+ genes.update({count: gene_details})
31
31
  count += 1
32
32
  ##################################
33
33
  tool_predictions = import_module('Tools.' + tool + '.' + tool)
34
34
  tool_predictions = getattr(tool_predictions, tool)
35
35
  orfs = tool_predictions(storfs_to_find_missing, genome_Seq)
36
- all_Metrics, all_rep_Metrics, start_precision, stop_precision, other_starts, other_stops, missed_genes, unmatched_orfs, undetected_gene_metrics, unmatched_orf_metrics, gene_coverage_genome, multi_Matched_ORFs, partial_Hits = tool_comparison(
37
- genes, orfs, genome_Seq)
38
- outname = tool + '_' + genome_to_compare
36
+ all_Metrics, all_rep_Metrics, start_precision, stop_precision, other_starts, other_stops, perfect_Matches, missed_genes, unmatched_orfs, undetected_gene_metrics, unmatched_orf_metrics, orf_Coverage_Genome, matched_ORF_Coverage_Genome, gene_coverage_genome, multi_Matched_ORFs, partial_Hits = tool_comparison(
37
+ genes, orfs, genome_Seq,True)
38
+ outname = tool + '_' + genome_to_compare.split('/')[-1].split('.')[0]
39
39
  metric_description = list(all_Metrics.keys())
40
40
  metrics = list(all_Metrics.values())
41
41
  rep_metric_description = list(all_rep_Metrics.keys())
@@ -0,0 +1,62 @@
1
+ import collections
2
+ import sys
3
+ try:
4
+ from utils import revCompIterative
5
+ from utils import sortORFs
6
+ except ImportError:
7
+ from ORForise.utils import revCompIterative
8
+ from ORForise.utils import sortORFs
9
+
10
+
11
+ def GFF(*args):
12
+ tool_pred = args[0]
13
+ genome = args[1]
14
+ #types = args[2]
15
+ GFF_ORFs = collections.OrderedDict()
16
+ genome_size = len(genome)
17
+ genome_rev = revCompIterative(genome)
18
+ with open(tool_pred, 'r') as gff_input:
19
+ for line in gff_input:
20
+ if '#' not in line:
21
+ line = line.split('\t')
22
+ #gene_types = types.split(',') - Temporary fix
23
+ #if any(gene_type == line[2] for gene_type in gene_types) and len(line) == 9: # line[2] for normalrun
24
+ if 'CDS' in line[2] and len(line) == 9:
25
+ start = int(line[3])
26
+ stop = int(line[4])
27
+ strand = line[6]
28
+ info = line[8]
29
+ if stop >= genome_size:
30
+ extra_stop = stop - genome_size
31
+ corrected_stop = genome_size
32
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
33
+ r_start = genome_size - corrected_stop
34
+ r_stop = genome_size - start
35
+ seq = genome_rev[r_start:r_stop + 1]
36
+ extra_seq = genome_rev[-extra_stop - 1:]
37
+ seq = extra_seq+seq
38
+ startCodon = seq[:3]
39
+ stopCodon = seq[-3:]
40
+ elif '+' in strand:
41
+ seq = genome[start -1 :corrected_stop]
42
+ extra_seq = genome[:extra_stop +1]
43
+ seq = seq+extra_seq
44
+ startCodon = seq[:3]
45
+ stopCodon = seq[-3:]
46
+ else:
47
+ if '-' in strand: # Reverse Compliment starts and stops adjusted
48
+ r_start = genome_size - stop
49
+ r_stop = genome_size - start
50
+ startCodon = genome_rev[r_start:r_start + 3]
51
+ stopCodon = genome_rev[r_stop - 2:r_stop + 1]
52
+ elif '+' in strand:
53
+ startCodon = genome[start - 1:start + 2]
54
+ stopCodon = genome[stop - 3:stop]
55
+ po = str(start) + ',' + str(stop)
56
+ orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
57
+ GFF_ORFs.update({po: orf})
58
+ # elif "CDS" in line[2]:
59
+ # sys.exit("SAS")
60
+
61
+ GFF_ORFs = sortORFs(GFF_ORFs)
62
+ return GFF_ORFs
@@ -14,12 +14,13 @@ def StORF_Reporter(tool_pred, genome):
14
14
  genome_rev = revCompIterative(genome)
15
15
  with open(tool_pred, 'r') as storf_input:
16
16
  for line in storf_input:
17
- if '#' not in line:
17
+ if not line.startswith('#') and not line.startswith('\n'):
18
18
  line = line.split()
19
- if 'StORF_Reporter' in line[1] or 'StoRF_Reporter' in line[1]: # need to harmonise this.
19
+ if 'StORF_Reporter' in line[1] or 'StoRF_Reporter' in line[1] or 'StORF' in line[1] or 'StORF-Reporter' in line[1]: # need to harmonise this.
20
20
  start = int(line[3])
21
21
  stop = int(line[4])
22
22
  strand = line[6]
23
+ info = line[8]
23
24
  if '-' in strand: # Reverse Compliment starts and stops adjusted
24
25
  r_start = genome_size - stop
25
26
  r_stop = genome_size - start
@@ -29,7 +30,7 @@ def StORF_Reporter(tool_pred, genome):
29
30
  startCodon = genome[start:start + 3]
30
31
  stopCodon = genome[stop - 3:stop]
31
32
  po = str(start) + ',' + str(stop)
32
- orf = [strand, startCodon, stopCodon, line[2]] # StORF/Con-StORF or CDS??
33
+ orf = [strand, startCodon, stopCodon, 'CDS', info] # StORF/Con-StORF or CDS??
33
34
  storf_orfs.update({po: orf})
34
35
 
35
36
  storf_orfs = sortORFs(storf_orfs)
@@ -4,22 +4,11 @@ import collections
4
4
  # Constants
5
5
  SHORT_ORF_LENGTH = 300
6
6
  MIN_COVERAGE = 75
7
- ORForise_Version = 'v1.4.0'
7
+ ORForise_Version = 'v1.4.2'
8
8
 
9
9
 
10
10
  def revCompIterative(watson): # Gets Reverse Complement
11
- complements = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C', 'N': 'N',
12
- 'R': 'Y', 'Y': 'R', 'S': 'S', 'W': 'W', 'K': 'M',
13
- 'M': 'K', 'V': 'B', 'B': 'V', 'H': 'D', 'D': 'H'}
14
- watson = watson.upper()
15
- watsonrev = watson[::-1]
16
- crick = ""
17
- for nt in watsonrev:
18
- try:
19
- crick += complements[nt]
20
- except KeyError:
21
- crick += nt # Do not modify non-standard DNA
22
- return crick
11
+ return watson.upper()[::-1].translate(str.maketrans("ATCGRYKMVBHD","TAGCYRMKBVDH"))
23
12
 
24
13
 
25
14
  def sortORFs(tool_ORFs): # Will only sort by given start position
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: ORForise
3
- Version: 1.4.0
3
+ Version: 1.4.2
4
4
  Summary: ORForise - Platform for analysing and comparing Prokaryote CoDing Sequence (CDS) Gene Predictions.
5
5
  Home-page: https://github.com/NickJD/ORForise
6
6
  Author: Nicholas Dimonaco
@@ -12,6 +12,7 @@ Classifier: Operating System :: OS Independent
12
12
  Requires-Python: >=3.6
13
13
  Description-Content-Type: text/markdown
14
14
  License-File: LICENSE
15
+ Requires-Dist: numpy
15
16
 
16
17
  # ORForise - Prokaryote Genome Annotation Analysis and Comparison Platform
17
18
  ## Published in Bioinformatics : https://academic.oup.com/bioinformatics/article/38/5/1198/6454948
@@ -20,7 +21,7 @@ License-File: LICENSE
20
21
 
21
22
  # Requirements and Installation:
22
23
 
23
- ### The ORForise platform is written in Python3.8 and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
24
+ ### The ORForise platform is written in Python (3.6-3.9) and only requires the NumPy library (should be installed automatically by pip when installing ORForise) which is standard in most base installations of Python3.
24
25
 
25
26
  ## Intallation:
26
27
 
@@ -61,7 +62,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
61
62
  usage: Annotation_Compare.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -t TOOL -tp TOOL_PREDICTION
62
63
  [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
63
64
 
64
- ORForise v1.4.0: Annotatione-Compare Run Parameters.
65
+ ORForise v1.4.2: Annotatione-Compare Run Parameters.
65
66
 
66
67
  Required Arguments:
67
68
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -111,7 +112,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
111
112
  usage: Aggregate_Compare.py [-h] -dna GENOME_DNA -t TOOLS -tp TOOL_PREDICTIONS -ref REFERENCE_ANNOTATION
112
113
  [-rt REFERENCE_TOOL] [-o OUTNAME] [-v {True,False}]
113
114
 
114
- ORForise v1.4.0: Aggregate-Compare Run Parameters.
115
+ ORForise v1.4.2: Aggregate-Compare Run Parameters.
115
116
 
116
117
  Required Arguments:
117
118
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -265,7 +266,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
265
266
  usage: GFF_Adder.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add ADDITIONAL_ANNOTATION -o
266
267
  OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-gene_ident GENE_IDENT] [-olap OVERLAP]
267
268
 
268
- ORForise v1.4.0: GFF-Adder Run Parameters.
269
+ ORForise v1.4.2: GFF-Adder Run Parameters.
269
270
 
270
271
  Required Arguments:
271
272
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -327,7 +328,7 @@ Please report any issues to: https://github.com/NickJD/ORForise/issues
327
328
  usage: GFF_Intersector.py [-h] -dna GENOME_DNA -ref REFERENCE_ANNOTATION -at ADDITIONAL_TOOL -add
328
329
  ADDITIONAL_ANNOTATION -o OUTPUT_FILE [-rt REFERENCE_TOOL] [-gi GENE_IDENT] [-cov COVERAGE]
329
330
 
330
- ORForise v1.4.0: GFF-Intersector Run Parameters.
331
+ ORForise v1.4.2: GFF-Intersector Run Parameters.
331
332
 
332
333
  Required Arguments:
333
334
  -dna GENOME_DNA Genome DNA file (.fa) which both annotations are based on
@@ -1,44 +0,0 @@
1
- import collections
2
- import sys
3
- try:
4
- from utils import revCompIterative
5
- from utils import sortORFs
6
- except ImportError:
7
- from ORForise.utils import revCompIterative
8
- from ORForise.utils import sortORFs
9
-
10
-
11
- def GFF(*args):
12
- tool_pred = args[0]
13
- genome = args[1]
14
- types = args[2]
15
- GFF_ORFs = collections.OrderedDict()
16
- genome_size = len(genome)
17
- genome_rev = revCompIterative(genome)
18
- with open(tool_pred, 'r') as gff_input:
19
- for line in gff_input:
20
- if '#' not in line:
21
- line = line.split('\t')
22
- gene_types = types.split(',')
23
- if any(gene_type == line[2] for gene_type in gene_types)and len(line) == 9: # line[2] for normalrun
24
- start = int(line[3])
25
- stop = int(line[4])
26
- strand = line[6]
27
- info = line[8]
28
- #name = line[8].split('Name=')[1].split(';')[0] # Issue with multiple records for each gene.
29
- if '-' in strand: # Reverse Compliment starts and stops adjusted
30
- r_start = genome_size - stop
31
- r_stop = genome_size - start
32
- startCodon = genome_rev[r_start:r_start + 3]
33
- stopCodon = genome_rev[r_stop - 2:r_stop + 1]
34
- elif '+' in strand:
35
- startCodon = genome[start - 1:start + 2]
36
- stopCodon = genome[stop - 3:stop]
37
- po = str(start) + ',' + str(stop)
38
- orf = [strand, startCodon, stopCodon, line[2],info] # This needs to detect the type
39
- GFF_ORFs.update({po: orf})
40
- # elif "CDS" in line[2]:
41
- # sys.exit("SAS")
42
-
43
- GFF_ORFs = sortORFs(GFF_ORFs)
44
- return GFF_ORFs
File without changes
File without changes