bacterial-annotator 0.6.1 → 0.6.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 110718bf7fe99545dd5418522eb463b51e6b483f
4
- data.tar.gz: 631d8765e3ecdcdd04497bbbccb589d6ab6688c5
3
+ metadata.gz: 713193368d710e1a3f78b0279657fab96ded5586
4
+ data.tar.gz: 82a924362e32d116b029429a00e55a45aa3f8dc7
5
5
  SHA512:
6
- metadata.gz: cd532e9139c817b75d835bab7cd3124574d311b6484289f8c642c5f823411cdca5427c6088f93f0ed01c291a782ad0100b40f8bfe15d3a75234b6deb7a21ffa6
7
- data.tar.gz: 0187bc5c3faa8e52184b00a12496e06ca6cc36b51d995a273a76f4728707e589cd011532f7060ac10146a78f8b466e28dcfd0ebf8552819d3c78a349083c32a6
6
+ metadata.gz: 9d853a09c8cfbb5e9bfc4525acf4d222ed8c15d601c7b40c0f68d21c724419f773dbe354e161940a04138a59c75c49743165f290f0f4acc1e4a7b15a7d5b9680
7
+ data.tar.gz: 3772047f7326e06f3cafd73f8776318001b3e53b446a92e144ec30725dd22fc41cb13c08a5f4cb4cc638bbc6b728f4c7af8b8375755f58b44ec42940f589cd6c
@@ -10,17 +10,18 @@
10
10
  require 'open-uri'
11
11
 
12
12
  ROOT_path = File.dirname(__FILE__)
13
- # raxml_url = https://github.com/stamatak/standard-RAxML/archive/master.zip
13
+ # raxml_url = https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip
14
14
  # Install RAXML on the user system
15
15
  def installRaxml
16
16
 
17
17
  begin
18
- resp = open("https://github.com/stamatak/standard-RAxML/archive/master.zip")
18
+ resp = open("https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip")
19
19
  open("#{ROOT_path}/master.zip", "wb") do |file|
20
20
  file.write(resp.read)
21
21
  end
22
22
  Dir.chdir("#{ROOT_path}/")
23
23
  `unzip master.zip && rm master.zip`
24
+ `mv standard-RAxML-8.2.11 standard-RAxML-master`
24
25
  Dir.chdir("#{ROOT_path}/standard-RAxML-master")
25
26
  `make -f Makefile.PTHREADS.gcc`
26
27
  `rm *.o`
@@ -11,6 +11,21 @@
11
11
  require 'bacterial-annotator'
12
12
  require 'bacterial-comparator'
13
13
 
14
+ VERSION = "0.6.1"
15
+
16
+ def print_version
17
+ version = "Bacterial Annotator - Version #{VERSION}\n\n"
18
+ version += "Packaged with : \n"
19
+ version += " -- Prodigal v2.6.2\n"
20
+ version += " -- BLAT standalone version\n"
21
+ version += " -- MAFFT v7.222\n"
22
+ version += " -- fasta36 v36.3.8d\n"
23
+ version += " -- RAxML v8.2.11\n"
24
+ version += " -- FastTree v2.1.10\n"
25
+ puts version
26
+ end
27
+
28
+
14
29
 
15
30
  # Usage message to print to CLI
16
31
  def usage
@@ -29,6 +44,8 @@ bacterial-annotator [annotate | compare] [OPTIONS]
29
44
 
30
45
  --help/-h Print this !
31
46
 
47
+ --version/-v Version of the Bacterial-Annotator
48
+
32
49
  OEM
33
50
 
34
51
  end
@@ -47,7 +64,7 @@ annotate [OPTIONS]
47
64
  --name/-n <name> Sample name
48
65
 
49
66
  // MERGEM-based Annotation (Recommended)
50
- --db MERGEM database directory
67
+ --db/-d <directory> MERGEM database directory
51
68
 
52
69
  // Reference-Based Annotation
53
70
  --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
@@ -56,8 +73,8 @@ annotate [OPTIONS]
56
73
  Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
57
74
  >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
58
75
  >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
59
- --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.7]
60
- --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.7]
76
+ --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
77
+ --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
61
78
  .. otherwise hint for a non-functional protein
62
79
  --minlength <length> Minimum contig length for annotation [default=500]
63
80
 
@@ -74,8 +91,8 @@ def parseOptions_annotate
74
91
 
75
92
  # default options
76
93
  options[:outdir] = "BAnnotation"
77
- options[:pidentity] = 70
78
- options[:pcoverage] = 70
94
+ options[:pidentity] = 80
95
+ options[:pcoverage] = 80
79
96
  options[:minlength] = 500
80
97
  options[:meta] = 0
81
98
 
@@ -84,6 +101,10 @@ def parseOptions_annotate
84
101
  case x.downcase
85
102
  when "--input", "-i"
86
103
  options[:input] = ARGV.shift
104
+ when "--name", "-n"
105
+ options[:name] = ARGV.shift
106
+ when "--db", "-d"
107
+ options[:mergem] = ARGV.shift
87
108
  when "--refgenome", "-g"
88
109
  options[:refgenome] = ARGV.shift
89
110
  when "--outdir", "-o"
@@ -101,8 +122,12 @@ def parseOptions_annotate
101
122
  when "--externaldb"
102
123
  options[:external_db] = ARGV.shift
103
124
  when "--help", "-h"
125
+
104
126
  usage_annotate
105
127
  abort
128
+ when "--version", "-v"
129
+ print_version
130
+ abort
106
131
  end
107
132
 
108
133
  end
@@ -188,12 +213,11 @@ def parseOptions_compare
188
213
 
189
214
  end
190
215
 
191
-
192
216
  ########
193
217
  # MAIN #
194
218
  ########
195
219
 
196
- if ARGV.size > 1
220
+ if ARGV.size >= 1
197
221
 
198
222
  ROOT = File.dirname(__FILE__)
199
223
 
@@ -242,11 +266,13 @@ if ARGV.size > 1
242
266
  bcomp.mafft_aln aln_opt
243
267
  bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
244
268
 
245
- else
269
+ elsif ARGV[0] == "--version" or ARGV[0] == "-v"
246
270
 
247
- usage
271
+ print_version
248
272
  abort
249
273
 
274
+ else
275
+ usage
250
276
  end
251
277
 
252
278
 
@@ -30,6 +30,10 @@ class BacterialAnnotator
30
30
  @options[:pcoverage] = @options[:pcoverage].to_f
31
31
  @options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
32
32
 
33
+ if ! @options.has_key? :name
34
+ @options[:name] = @options[:input].gsub(/.fasta|.fa|.fna/,"")
35
+ end
36
+
33
37
  if File.exists? (@options[:outdir])
34
38
  if ! options.has_key? :force
35
39
  abort "Output directory already exist ! Choose another one or use -f to overwrite"
@@ -39,7 +39,6 @@ class SequenceSynteny
39
39
  partial = false
40
40
  if properties.length >= 2 and properties[1].include? "partial"
41
41
  partial = (properties[1].gsub("partial=","")=='01')
42
- puts "partial:" + partial.to_s
43
42
  end
44
43
  sequences[s_name][:partial] = partial
45
44
  sequences[s_name][:length] = s.seq.length
@@ -54,9 +53,10 @@ class SequenceSynteny
54
53
  # run blat on proteins
55
54
  def run_blat root, outdir
56
55
  base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
57
- system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
58
56
  if @type == "prot"
59
57
  system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
58
+ else
59
+ system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
60
60
  end
61
61
  @aln_file = "#{outdir}/#{@name}.blat8.tsv"
62
62
  # extract_hits
@@ -153,57 +153,6 @@ class SequenceSynteny
153
153
  end # end of method
154
154
 
155
155
 
156
- # Extract Hit from blast8 file and save it in hash
157
- # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
158
- def extract_hits_prodigal mode
159
-
160
- @aln_hits = {}
161
- feature = ""
162
- File.open(@aln_file,"r") do |fread|
163
- while l = fread.gets
164
- lA = l.chomp!.split("\t")
165
- key = lA[0]
166
- if mode == :refgenome
167
- hit = lA[1]
168
- feature = "cds"
169
- elsif mode == :externaldb
170
- hit = lA[1].chomp.split("|")[3]
171
- feature = "cds"
172
- end
173
- next if lA[2].to_f < @pidentity
174
- if ! @aln_hits.has_key? key
175
- @aln_hits[key] = {
176
- pId: lA[2].to_f.round(2),
177
- evalue: lA[10],
178
- score: lA[11].to_f,
179
- hits: [hit],
180
- length: [lA[3].to_i],
181
- query_location: [[lA[6].to_i,lA[7].to_i]],
182
- subject_location: [[lA[8].to_i,lA[9].to_i]],
183
- feature: feature
184
- }
185
- elsif lA[11].to_f > @aln_hits[key][:score]
186
- @aln_hits[key] = {
187
- pId: lA[2].to_f.round(2),
188
- evalue: lA[10],
189
- score: lA[11].to_f,
190
- hits: [hit],
191
- length: [lA[3].to_i],
192
- query_location: [[lA[6].to_i,lA[7].to_i]],
193
- subject_location: [[lA[8].to_i,lA[9].to_i]],
194
- feature: feature
195
- }
196
- elsif lA[11].to_f == @aln_hits[key][:score]
197
- @aln_hits[key][:hits] << hit
198
- @aln_hits[key][:length] << lA[3].to_i
199
- @aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
200
- @aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
201
- end
202
- end
203
- end
204
-
205
- end # end of method
206
-
207
156
  # Extract Hit from blast8 file and save it in hash
208
157
  # prpa PA0668.4|rRNA|23S 99.97 2891 1 0 705042 707932 1 2891 0.0e+00 5671.0
209
158
  def extract_hits_dna mode
@@ -247,7 +247,7 @@ class BacterialComparator
247
247
  puts "..Prot alignment files already exists, skipping."
248
248
  end
249
249
 
250
- # ugly hack to find out the reference genome
250
+ # FIXME ugly hack to find out the reference genome
251
251
  ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
252
252
 
253
253
  concat_alignments "align-genes-pep.all.fasta", ref_id
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-18 00:00:00.000000000 Z
11
+ date: 2017-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio