bacterial-annotator 0.6.1 → 0.6.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 110718bf7fe99545dd5418522eb463b51e6b483f
4
- data.tar.gz: 631d8765e3ecdcdd04497bbbccb589d6ab6688c5
3
+ metadata.gz: 713193368d710e1a3f78b0279657fab96ded5586
4
+ data.tar.gz: 82a924362e32d116b029429a00e55a45aa3f8dc7
5
5
  SHA512:
6
- metadata.gz: cd532e9139c817b75d835bab7cd3124574d311b6484289f8c642c5f823411cdca5427c6088f93f0ed01c291a782ad0100b40f8bfe15d3a75234b6deb7a21ffa6
7
- data.tar.gz: 0187bc5c3faa8e52184b00a12496e06ca6cc36b51d995a273a76f4728707e589cd011532f7060ac10146a78f8b466e28dcfd0ebf8552819d3c78a349083c32a6
6
+ metadata.gz: 9d853a09c8cfbb5e9bfc4525acf4d222ed8c15d601c7b40c0f68d21c724419f773dbe354e161940a04138a59c75c49743165f290f0f4acc1e4a7b15a7d5b9680
7
+ data.tar.gz: 3772047f7326e06f3cafd73f8776318001b3e53b446a92e144ec30725dd22fc41cb13c08a5f4cb4cc638bbc6b728f4c7af8b8375755f58b44ec42940f589cd6c
@@ -10,17 +10,18 @@
10
10
  require 'open-uri'
11
11
 
12
12
  ROOT_path = File.dirname(__FILE__)
13
- # raxml_url = https://github.com/stamatak/standard-RAxML/archive/master.zip
13
+ # raxml_url = https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip
14
14
  # Install RAXML on the user system
15
15
  def installRaxml
16
16
 
17
17
  begin
18
- resp = open("https://github.com/stamatak/standard-RAxML/archive/master.zip")
18
+ resp = open("https://github.com/stamatak/standard-RAxML/archive/v8.2.11.zip")
19
19
  open("#{ROOT_path}/master.zip", "wb") do |file|
20
20
  file.write(resp.read)
21
21
  end
22
22
  Dir.chdir("#{ROOT_path}/")
23
23
  `unzip master.zip && rm master.zip`
24
+ `mv standard-RAxML-8.2.11 standard-RAxML-master`
24
25
  Dir.chdir("#{ROOT_path}/standard-RAxML-master")
25
26
  `make -f Makefile.PTHREADS.gcc`
26
27
  `rm *.o`
@@ -11,6 +11,21 @@
11
11
  require 'bacterial-annotator'
12
12
  require 'bacterial-comparator'
13
13
 
14
+ VERSION = "0.6.1"
15
+
16
+ def print_version
17
+ version = "Bacterial Annotator - Version #{VERSION}\n\n"
18
+ version += "Packaged with : \n"
19
+ version += " -- Prodigal v2.6.2\n"
20
+ version += " -- BLAT standalone version\n"
21
+ version += " -- MAFFT v7.222\n"
22
+ version += " -- fasta36 v36.3.8d\n"
23
+ version += " -- RAxML v8.2.11\n"
24
+ version += " -- FastTree v2.1.10\n"
25
+ puts version
26
+ end
27
+
28
+
14
29
 
15
30
  # Usage message to print to CLI
16
31
  def usage
@@ -29,6 +44,8 @@ bacterial-annotator [annotate | compare] [OPTIONS]
29
44
 
30
45
  --help/-h Print this !
31
46
 
47
+ --version/-v Version of the Bacterial-Annotator
48
+
32
49
  OEM
33
50
 
34
51
  end
@@ -47,7 +64,7 @@ annotate [OPTIONS]
47
64
  --name/-n <name> Sample name
48
65
 
49
66
  // MERGEM-based Annotation (Recommended)
50
- --db MERGEM database directory
67
+ --db/-d <directory> MERGEM database directory
51
68
 
52
69
  // Reference-Based Annotation
53
70
  --refgenome/-g <GBK_ID> Provide a Genbank file or a Gbk Accession ID.
@@ -56,8 +73,8 @@ annotate [OPTIONS]
56
73
  Fasta headers need to look similar to NCBI or EBI fasta headers, ex.:
57
74
  >gi|385721352|gb|AFI72857.1| NDM-1 [Escherichia coli]
58
75
  >sp|C7C422|BLAN1_KLEPN Beta-lactamase NDM-1 OS=Klebsiella pneumoniae..
59
- --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.7]
60
- --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.7]
76
+ --pidentity <% identity> Minimum percentage identity to incorporate a CDS annotation [default=0.8]
77
+ --pcoverage <% identity> Minimum percentage of coverage over protein alignment to incorporate a CDS annotation [default=0.8]
61
78
  .. otherwise hint for a non-functional protein
62
79
  --minlength <length> Minimum contig length for annotation [default=500]
63
80
 
@@ -74,8 +91,8 @@ def parseOptions_annotate
74
91
 
75
92
  # default options
76
93
  options[:outdir] = "BAnnotation"
77
- options[:pidentity] = 70
78
- options[:pcoverage] = 70
94
+ options[:pidentity] = 80
95
+ options[:pcoverage] = 80
79
96
  options[:minlength] = 500
80
97
  options[:meta] = 0
81
98
 
@@ -84,6 +101,10 @@ def parseOptions_annotate
84
101
  case x.downcase
85
102
  when "--input", "-i"
86
103
  options[:input] = ARGV.shift
104
+ when "--name", "-n"
105
+ options[:name] = ARGV.shift
106
+ when "--db", "-d"
107
+ options[:mergem] = ARGV.shift
87
108
  when "--refgenome", "-g"
88
109
  options[:refgenome] = ARGV.shift
89
110
  when "--outdir", "-o"
@@ -101,8 +122,12 @@ def parseOptions_annotate
101
122
  when "--externaldb"
102
123
  options[:external_db] = ARGV.shift
103
124
  when "--help", "-h"
125
+
104
126
  usage_annotate
105
127
  abort
128
+ when "--version", "-v"
129
+ print_version
130
+ abort
106
131
  end
107
132
 
108
133
  end
@@ -188,12 +213,11 @@ def parseOptions_compare
188
213
 
189
214
  end
190
215
 
191
-
192
216
  ########
193
217
  # MAIN #
194
218
  ########
195
219
 
196
- if ARGV.size > 1
220
+ if ARGV.size >= 1
197
221
 
198
222
  ROOT = File.dirname(__FILE__)
199
223
 
@@ -242,11 +266,13 @@ if ARGV.size > 1
242
266
  bcomp.mafft_aln aln_opt
243
267
  bcomp.raxml_tree aln_opt, options[:bootstrap] if options[:phylogeny] == 1
244
268
 
245
- else
269
+ elsif ARGV[0] == "--version" or ARGV[0] == "-v"
246
270
 
247
- usage
271
+ print_version
248
272
  abort
249
273
 
274
+ else
275
+ usage
250
276
  end
251
277
 
252
278
 
@@ -30,6 +30,10 @@ class BacterialAnnotator
30
30
  @options[:pcoverage] = @options[:pcoverage].to_f
31
31
  @options[:pcoverage] = @options[:pcoverage] / 100 if @options[:pcoverage] > 1.00
32
32
 
33
+ if ! @options.has_key? :name
34
+ @options[:name] = @options[:input].gsub(/.fasta|.fa|.fna/,"")
35
+ end
36
+
33
37
  if File.exists? (@options[:outdir])
34
38
  if ! options.has_key? :force
35
39
  abort "Output directory already exist ! Choose another one or use -f to overwrite"
@@ -39,7 +39,6 @@ class SequenceSynteny
39
39
  partial = false
40
40
  if properties.length >= 2 and properties[1].include? "partial"
41
41
  partial = (properties[1].gsub("partial=","")=='01')
42
- puts "partial:" + partial.to_s
43
42
  end
44
43
  sequences[s_name][:partial] = partial
45
44
  sequences[s_name][:length] = s.seq.length
@@ -54,9 +53,10 @@ class SequenceSynteny
54
53
  # run blat on proteins
55
54
  def run_blat root, outdir
56
55
  base_cmd = "#{root}/blat.linux -out=blast8 -minIdentity=#{@pidentity}"
57
- system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
58
56
  if @type == "prot"
59
57
  system("#{base_cmd} -prot #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
58
+ else
59
+ system("#{base_cmd} #{@subject_file} #{@query_file} #{outdir}/#{@name}.blat8.tsv")
60
60
  end
61
61
  @aln_file = "#{outdir}/#{@name}.blat8.tsv"
62
62
  # extract_hits
@@ -153,57 +153,6 @@ class SequenceSynteny
153
153
  end # end of method
154
154
 
155
155
 
156
- # Extract Hit from blast8 file and save it in hash
157
- # contig-0_1 ABJ71957.1 96.92 65 2 0 1 65 1 65 9.2e-31 131.0
158
- def extract_hits_prodigal mode
159
-
160
- @aln_hits = {}
161
- feature = ""
162
- File.open(@aln_file,"r") do |fread|
163
- while l = fread.gets
164
- lA = l.chomp!.split("\t")
165
- key = lA[0]
166
- if mode == :refgenome
167
- hit = lA[1]
168
- feature = "cds"
169
- elsif mode == :externaldb
170
- hit = lA[1].chomp.split("|")[3]
171
- feature = "cds"
172
- end
173
- next if lA[2].to_f < @pidentity
174
- if ! @aln_hits.has_key? key
175
- @aln_hits[key] = {
176
- pId: lA[2].to_f.round(2),
177
- evalue: lA[10],
178
- score: lA[11].to_f,
179
- hits: [hit],
180
- length: [lA[3].to_i],
181
- query_location: [[lA[6].to_i,lA[7].to_i]],
182
- subject_location: [[lA[8].to_i,lA[9].to_i]],
183
- feature: feature
184
- }
185
- elsif lA[11].to_f > @aln_hits[key][:score]
186
- @aln_hits[key] = {
187
- pId: lA[2].to_f.round(2),
188
- evalue: lA[10],
189
- score: lA[11].to_f,
190
- hits: [hit],
191
- length: [lA[3].to_i],
192
- query_location: [[lA[6].to_i,lA[7].to_i]],
193
- subject_location: [[lA[8].to_i,lA[9].to_i]],
194
- feature: feature
195
- }
196
- elsif lA[11].to_f == @aln_hits[key][:score]
197
- @aln_hits[key][:hits] << hit
198
- @aln_hits[key][:length] << lA[3].to_i
199
- @aln_hits[key][:query_location] << [lA[6].to_i,lA[7].to_i]
200
- @aln_hits[key][:subject_location] << [lA[8].to_i,lA[9].to_i]
201
- end
202
- end
203
- end
204
-
205
- end # end of method
206
-
207
156
  # Extract Hit from blast8 file and save it in hash
208
157
  # prpa PA0668.4|rRNA|23S 99.97 2891 1 0 705042 707932 1 2891 0.0e+00 5671.0
209
158
  def extract_hits_dna mode
@@ -247,7 +247,7 @@ class BacterialComparator
247
247
  puts "..Prot alignment files already exists, skipping."
248
248
  end
249
249
 
250
- # ugly hack to find out the reference genome
250
+ # FIXME ugly hack to find out the reference genome
251
251
  ref_id = Dir["#{ori_dir}/#{@genomes_list[0]}/*.pep"][0].split('/')[-1].gsub(".pep","")
252
252
 
253
253
  concat_alignments "align-genes-pep.all.fasta", ref_id
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bacterial-annotator
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.1
4
+ version: 0.6.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Maxime Deraspe
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2017-09-18 00:00:00.000000000 Z
11
+ date: 2017-10-03 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bio