crb-blast 0.2 → 0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crb-blast +11 -2
  3. data/lib/crb-blast.rb +119 -28
  4. metadata +55 -22
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4271f12aa93ccaa344fe8dbc1bcdecd1281ef7a1
4
- data.tar.gz: 7222b48806f393e89c096ad50aaa92997dccbe9d
3
+ metadata.gz: fb3b123812929285b57903de865b981104ffdfe9
4
+ data.tar.gz: d66187a8a9b08b4bb282d03ed75898f1f5789af8
5
5
  SHA512:
6
- metadata.gz: f41b2d87c4113fbb3022bfd80329c7a1224ed166c0bd3402dd15d244e1b7f69cbd6f386c6839f294a0f76e0d2adc4d7630ea59bf2dc860c06f07fedf8d4e6544
7
- data.tar.gz: 2907e7327968d1edab89a3b6849e811d40e29bc8011671894708ebd656ebce1acb05e970aad0f9bfcfaddbbd2834deee9b1bb589a5e0028435c6023d1b89703d
6
+ metadata.gz: 260463b1df616c63dbd262fb5d0e205610d221c4d81fdf0216e28fdcf8fde7d7f821c2e10d80edec969d5d9b64288823e169d06f7235abbca7553f1a633e9fd2
7
+ data.tar.gz: cde2ffeb8b7efe84d6ca09f99358dc5dd7cf77f193dfc91669af7a9ba54e66712f085c9dbe26d45f7abe2e01a96f453568198b1516fedcecf2ee0f3cefcf6c70
data/bin/crb-blast CHANGED
@@ -6,11 +6,12 @@
6
6
 
7
7
  require 'trollop'
8
8
  require 'crb-blast'
9
+ require 'bindeps'
9
10
 
10
11
  opts = Trollop::options do
11
12
  banner <<-EOS
12
13
 
13
- CRB-Blast v0.1 by Chris Boursnell <cmb211@cam.ac.uk>
14
+ CRB-Blast v0.3 by Chris Boursnell <cmb211@cam.ac.uk>
14
15
 
15
16
  Conditional Reciprocal Best BLAST
16
17
 
@@ -44,14 +45,22 @@ EOS
44
45
  "output file as tsv",
45
46
  :required => true,
46
47
  :type => String
48
+
49
+ opt :split,
50
+ "split the fasta files into chunks and run multiple blast jobs and then"+
51
+ "combine them."
47
52
  end
48
53
 
49
54
  Trollop::die :query, "must exist" if !File.exist?(opts[:query])
50
55
  Trollop::die :target, "must exist" if !File.exist?(opts[:target])
51
56
 
57
+ gem_dir = Gem.loaded_specs['crb-blast'].full_gem_path
58
+ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
59
+ Bindeps.require gem_deps
60
+
52
61
  blaster = CRB_Blast.new(opts.query, opts.target)
53
62
  dbs = blaster.makedb
54
- run = blaster.run_blast(opts.evalue, opts.threads)
63
+ run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
55
64
  load = blaster.load_outputs
56
65
  recips = blaster.find_reciprocals
57
66
  secondaries = blaster.find_secondaries
data/lib/crb-blast.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  require 'bio'
4
4
  require 'which'
5
5
  require 'hit'
6
- require 'crb-blast'
6
+ require 'threach'
7
7
 
8
8
  class Bio::FastaFormat
9
9
  def isNucl?
@@ -119,51 +119,142 @@ class CRB_Blast
119
119
  [@query_name, @target_name]
120
120
  end
121
121
 
122
- def run_blast(evalue, threads)
122
+ def run_blast(evalue, threads, split)
123
123
  if @databases
124
124
  @output1 = "#{@working_dir}/#{query_name}_into_#{target_name}.1.blast"
125
125
  @output2 = "#{@working_dir}/#{target_name}_into_#{query_name}.2.blast"
126
- cmd1=""
127
- cmd2=""
128
126
  if @query_is_prot
129
127
  if @target_is_prot
130
- cmd1 << "#{@blastp_path} "
131
- cmd2 << "#{@blastp_path} "
128
+ bin1 = "#{@blastp_path} "
129
+ bin2 = "#{@blastp_path} "
132
130
  else
133
- cmd1 << "#{@tblastn_path} "
134
- cmd2 << "#{@blastx_path} "
131
+ bin1 = "#{@tblastn_path} "
132
+ bin2 = "#{@blastx_path} "
135
133
  end
136
134
  else
137
135
  if @target_is_prot
138
- cmd1 << "#{@blastx_path} "
139
- cmd2 << "#{@tblastn_path} "
136
+ bin1 = "#{@blastx_path} "
137
+ bin2 = "#{@tblastn_path} "
140
138
  else
141
- cmd1 << "#{@blastn_path} "
142
- cmd2 << "#{@blastn_path} "
139
+ bin1 = "#{@blastn_path} "
140
+ bin2 = "#{@blastn_path} "
143
141
  end
144
142
  end
145
- cmd1 << " -query #{@query} -db #{@working_dir}/#{@target_name} "
146
- cmd1 << " -out #{@output1} -evalue #{evalue} "
143
+ if split and threads > 1
144
+ run_blast_with_splitting evalue, threads, bin1, bin2
145
+ else
146
+ run_blast_with_threads evalue, threads, bin1, bin2
147
+ end
148
+ return true
149
+ else
150
+ return false
151
+ end
152
+ end
153
+
154
+ def run_blast_with_threads evalue, threads, bin1, bin2
155
+ # puts "running blast with #{threads} threads"
156
+ cmd1 = "#{bin1} -query #{@query} -db #{@working_dir}/#{@target_name} "
157
+ cmd1 << " -out #{@output1} -evalue #{evalue} "
158
+ cmd1 << " -outfmt \"6 std qlen slen\" "
159
+ cmd1 << " -max_target_seqs 50 "
160
+ cmd1 << " -num_threads #{threads}"
161
+
162
+ cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
163
+ cmd2 << " -out #{@output2} -evalue #{evalue} "
164
+ cmd2 << " -outfmt \"6 std qlen slen\" "
165
+ cmd2 << " -max_target_seqs 50 "
166
+ cmd2 << " -num_threads #{threads}"
167
+ if !File.exist?("#{@output1}")
168
+ `#{cmd1}`
169
+ end
170
+
171
+ if !File.exist?("#{@output2}")
172
+ `#{cmd2}`
173
+ end
174
+ end
175
+
176
+ def run_blast_with_splitting evalue, threads, bin1, bin2
177
+ # puts "running blast by splitting input into #{threads} pieces"
178
+ blasts=[]
179
+ files = split_input(@query, threads)
180
+ files.threach(threads) do |thread|
181
+ cmd1 = "#{bin1} -query #{thread} -db #{@working_dir}/#{@target_name} "
182
+ cmd1 << " -out #{thread}.blast -evalue #{evalue} "
147
183
  cmd1 << " -outfmt \"6 std qlen slen\" "
148
184
  cmd1 << " -max_target_seqs 50 "
149
- cmd1 << " -num_threads #{threads}"
185
+ cmd1 << " -num_threads 1"
186
+ if !File.exists?("#{thread}.blast")
187
+ `#{cmd1}`
188
+ end
189
+ blasts << "#{thread}.blast"
190
+ end
191
+ cat_cmd = "cat "
192
+ cat_cmd << blasts.join(" ")
193
+ cat_cmd << " > #{@output1}"
194
+ `#{cat_cmd}`
195
+ blasts.each do |b|
196
+ File.delete(b) # delete intermediate blast output files
197
+ end
150
198
 
151
- cmd2 << " -query #{@target} -db #{@working_dir}/#{@query_name} "
152
- cmd2 << " -out #{@output2} -evalue #{evalue} "
199
+ blasts=[]
200
+ files = split_input(@target, threads)
201
+ files.threach(threads) do |thread|
202
+ cmd2 = "#{bin2} -query #{thread} -db #{@working_dir}/#{@query_name} "
203
+ cmd2 << " -out #{thread}.blast -evalue #{evalue} "
153
204
  cmd2 << " -outfmt \"6 std qlen slen\" "
154
205
  cmd2 << " -max_target_seqs 50 "
155
- cmd2 << " -num_threads #{threads}"
156
-
157
- if !File.exists?("#{@output1}")
158
- `#{cmd1}`
159
- end
160
- if !File.exists?("#{@output2}")
206
+ cmd2 << " -num_threads 1"
207
+ if !File.exists?("#{thread}.blast")
161
208
  `#{cmd2}`
162
209
  end
163
- return true
164
- else
165
- return false
210
+ blasts << "#{thread}.blast"
211
+ end
212
+ cat_cmd = "cat "
213
+ cat_cmd << blasts.join(" ")
214
+ cat_cmd << " > #{@output2}"
215
+ `#{cat_cmd}`
216
+ blasts.each do |b|
217
+ File.delete(b) # delete intermediate blast output files
218
+ end
219
+
220
+ end
221
+
222
+ def split_input filename, pieces
223
+ input = {}
224
+ name = nil
225
+ seq=""
226
+ File.open(filename).each_line do |line|
227
+ if line =~ /^>(.*)$/
228
+ if name
229
+ input[name]=seq
230
+ seq=""
231
+ end
232
+ name = $1
233
+ else
234
+ seq << line.chomp
235
+ end
236
+ end
237
+ input[name]=seq
238
+ # construct list of output file handles
239
+ outputs=[]
240
+ output_files=[]
241
+ pieces.times do |n|
242
+ outfile = "#{filename}_chunk_#{n}.fasta"
243
+ outputs[n] = File.open("#{outfile}", "w")
244
+ output_files[n] = "#{outfile}"
245
+ end
246
+ # write sequences
247
+ count=0
248
+ input.each_pair do |name, seq|
249
+ outputs[count].write(">#{name}\n")
250
+ outputs[count].write("#{seq}\n")
251
+ count += 1
252
+ count %= pieces
253
+ end
254
+ outputs.each do |out|
255
+ out.close
166
256
  end
257
+ output_files
167
258
  end
168
259
 
169
260
  def load_outputs
@@ -312,9 +403,9 @@ class CRB_Blast
312
403
  @target_results = nil
313
404
  end
314
405
 
315
- def run evalue, threads
406
+ def run evalue, threads, split
316
407
  makedb
317
- run_blast evalue, threads
408
+ run_blast evalue, threads, split
318
409
  load_outputs
319
410
  find_reciprocals
320
411
  find_secondaries
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crb-blast
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Boursnell
@@ -15,48 +15,48 @@ dependencies:
15
15
  name: trollop
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '>='
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: rake
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - '>='
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: bio
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - ~>
46
+ - - "~>"
47
47
  - !ruby/object:Gem::Version
48
48
  version: '1.4'
49
- - - '>='
49
+ - - ">="
50
50
  - !ruby/object:Gem::Version
51
51
  version: 1.4.3
52
52
  type: :runtime
53
53
  prerelease: false
54
54
  version_requirements: !ruby/object:Gem::Requirement
55
55
  requirements:
56
- - - ~>
56
+ - - "~>"
57
57
  - !ruby/object:Gem::Version
58
58
  version: '1.4'
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: 1.4.3
62
62
  - !ruby/object:Gem::Dependency
@@ -73,60 +73,94 @@ dependencies:
73
73
  - - '='
74
74
  - !ruby/object:Gem::Version
75
75
  version: 0.0.2
76
+ - !ruby/object:Gem::Dependency
77
+ name: threach
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.2.0
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.2.0
90
+ - !ruby/object:Gem::Dependency
91
+ name: bindeps
92
+ requirement: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.0'
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: 0.0.7
100
+ type: :runtime
101
+ prerelease: false
102
+ version_requirements: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - "~>"
105
+ - !ruby/object:Gem::Version
106
+ version: '0.0'
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.0.7
76
110
  - !ruby/object:Gem::Dependency
77
111
  name: turn
78
112
  requirement: !ruby/object:Gem::Requirement
79
113
  requirements:
80
- - - '>='
114
+ - - ">="
81
115
  - !ruby/object:Gem::Version
82
116
  version: '0'
83
117
  type: :development
84
118
  prerelease: false
85
119
  version_requirements: !ruby/object:Gem::Requirement
86
120
  requirements:
87
- - - '>='
121
+ - - ">="
88
122
  - !ruby/object:Gem::Version
89
123
  version: '0'
90
124
  - !ruby/object:Gem::Dependency
91
125
  name: simplecov
92
126
  requirement: !ruby/object:Gem::Requirement
93
127
  requirements:
94
- - - '>='
128
+ - - ">="
95
129
  - !ruby/object:Gem::Version
96
130
  version: '0'
97
131
  type: :development
98
132
  prerelease: false
99
133
  version_requirements: !ruby/object:Gem::Requirement
100
134
  requirements:
101
- - - '>='
135
+ - - ">="
102
136
  - !ruby/object:Gem::Version
103
137
  version: '0'
104
138
  - !ruby/object:Gem::Dependency
105
139
  name: shoulda-context
106
140
  requirement: !ruby/object:Gem::Requirement
107
141
  requirements:
108
- - - '>='
142
+ - - ">="
109
143
  - !ruby/object:Gem::Version
110
144
  version: '0'
111
145
  type: :development
112
146
  prerelease: false
113
147
  version_requirements: !ruby/object:Gem::Requirement
114
148
  requirements:
115
- - - '>='
149
+ - - ">="
116
150
  - !ruby/object:Gem::Version
117
151
  version: '0'
118
152
  - !ruby/object:Gem::Dependency
119
153
  name: coveralls
120
154
  requirement: !ruby/object:Gem::Requirement
121
155
  requirements:
122
- - - '>='
156
+ - - ">="
123
157
  - !ruby/object:Gem::Version
124
158
  version: 0.6.7
125
159
  type: :development
126
160
  prerelease: false
127
161
  version_requirements: !ruby/object:Gem::Requirement
128
162
  requirements:
129
- - - '>='
163
+ - - ">="
130
164
  - !ruby/object:Gem::Version
131
165
  version: 0.6.7
132
166
  description: See summary
@@ -136,9 +170,9 @@ executables:
136
170
  extensions: []
137
171
  extra_rdoc_files: []
138
172
  files:
173
+ - bin/crb-blast
139
174
  - lib/crb-blast.rb
140
175
  - lib/hit.rb
141
- - bin/crb-blast
142
176
  homepage: http://rubygems.org/gems/crb-blast
143
177
  licenses:
144
178
  - MIT
@@ -149,19 +183,18 @@ require_paths:
149
183
  - lib
150
184
  required_ruby_version: !ruby/object:Gem::Requirement
151
185
  requirements:
152
- - - '>='
186
+ - - ">="
153
187
  - !ruby/object:Gem::Version
154
188
  version: '0'
155
189
  required_rubygems_version: !ruby/object:Gem::Requirement
156
190
  requirements:
157
- - - '>='
191
+ - - ">="
158
192
  - !ruby/object:Gem::Version
159
193
  version: '0'
160
194
  requirements: []
161
195
  rubyforge_project:
162
- rubygems_version: 2.0.6
196
+ rubygems_version: 2.2.2
163
197
  signing_key:
164
198
  specification_version: 4
165
199
  summary: Run conditional reciprocal best blast
166
200
  test_files: []
167
- has_rdoc: