crb-blast 0.2 → 0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/bin/crb-blast +11 -2
  3. data/lib/crb-blast.rb +119 -28
  4. metadata +55 -22
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4271f12aa93ccaa344fe8dbc1bcdecd1281ef7a1
4
- data.tar.gz: 7222b48806f393e89c096ad50aaa92997dccbe9d
3
+ metadata.gz: fb3b123812929285b57903de865b981104ffdfe9
4
+ data.tar.gz: d66187a8a9b08b4bb282d03ed75898f1f5789af8
5
5
  SHA512:
6
- metadata.gz: f41b2d87c4113fbb3022bfd80329c7a1224ed166c0bd3402dd15d244e1b7f69cbd6f386c6839f294a0f76e0d2adc4d7630ea59bf2dc860c06f07fedf8d4e6544
7
- data.tar.gz: 2907e7327968d1edab89a3b6849e811d40e29bc8011671894708ebd656ebce1acb05e970aad0f9bfcfaddbbd2834deee9b1bb589a5e0028435c6023d1b89703d
6
+ metadata.gz: 260463b1df616c63dbd262fb5d0e205610d221c4d81fdf0216e28fdcf8fde7d7f821c2e10d80edec969d5d9b64288823e169d06f7235abbca7553f1a633e9fd2
7
+ data.tar.gz: cde2ffeb8b7efe84d6ca09f99358dc5dd7cf77f193dfc91669af7a9ba54e66712f085c9dbe26d45f7abe2e01a96f453568198b1516fedcecf2ee0f3cefcf6c70
data/bin/crb-blast CHANGED
@@ -6,11 +6,12 @@
6
6
 
7
7
  require 'trollop'
8
8
  require 'crb-blast'
9
+ require 'bindeps'
9
10
 
10
11
  opts = Trollop::options do
11
12
  banner <<-EOS
12
13
 
13
- CRB-Blast v0.1 by Chris Boursnell <cmb211@cam.ac.uk>
14
+ CRB-Blast v0.3 by Chris Boursnell <cmb211@cam.ac.uk>
14
15
 
15
16
  Conditional Reciprocal Best BLAST
16
17
 
@@ -44,14 +45,22 @@ EOS
44
45
  "output file as tsv",
45
46
  :required => true,
46
47
  :type => String
48
+
49
+ opt :split,
50
+ "split the fasta files into chunks and run multiple blast jobs and then"+
51
+ "combine them."
47
52
  end
48
53
 
49
54
  Trollop::die :query, "must exist" if !File.exist?(opts[:query])
50
55
  Trollop::die :target, "must exist" if !File.exist?(opts[:target])
51
56
 
57
+ gem_dir = Gem.loaded_specs['crb-blast'].full_gem_path
58
+ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
59
+ Bindeps.require gem_deps
60
+
52
61
  blaster = CRB_Blast.new(opts.query, opts.target)
53
62
  dbs = blaster.makedb
54
- run = blaster.run_blast(opts.evalue, opts.threads)
63
+ run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
55
64
  load = blaster.load_outputs
56
65
  recips = blaster.find_reciprocals
57
66
  secondaries = blaster.find_secondaries
data/lib/crb-blast.rb CHANGED
@@ -3,7 +3,7 @@
3
3
  require 'bio'
4
4
  require 'which'
5
5
  require 'hit'
6
- require 'crb-blast'
6
+ require 'threach'
7
7
 
8
8
  class Bio::FastaFormat
9
9
  def isNucl?
@@ -119,51 +119,142 @@ class CRB_Blast
119
119
  [@query_name, @target_name]
120
120
  end
121
121
 
122
- def run_blast(evalue, threads)
122
+ def run_blast(evalue, threads, split)
123
123
  if @databases
124
124
  @output1 = "#{@working_dir}/#{query_name}_into_#{target_name}.1.blast"
125
125
  @output2 = "#{@working_dir}/#{target_name}_into_#{query_name}.2.blast"
126
- cmd1=""
127
- cmd2=""
128
126
  if @query_is_prot
129
127
  if @target_is_prot
130
- cmd1 << "#{@blastp_path} "
131
- cmd2 << "#{@blastp_path} "
128
+ bin1 = "#{@blastp_path} "
129
+ bin2 = "#{@blastp_path} "
132
130
  else
133
- cmd1 << "#{@tblastn_path} "
134
- cmd2 << "#{@blastx_path} "
131
+ bin1 = "#{@tblastn_path} "
132
+ bin2 = "#{@blastx_path} "
135
133
  end
136
134
  else
137
135
  if @target_is_prot
138
- cmd1 << "#{@blastx_path} "
139
- cmd2 << "#{@tblastn_path} "
136
+ bin1 = "#{@blastx_path} "
137
+ bin2 = "#{@tblastn_path} "
140
138
  else
141
- cmd1 << "#{@blastn_path} "
142
- cmd2 << "#{@blastn_path} "
139
+ bin1 = "#{@blastn_path} "
140
+ bin2 = "#{@blastn_path} "
143
141
  end
144
142
  end
145
- cmd1 << " -query #{@query} -db #{@working_dir}/#{@target_name} "
146
- cmd1 << " -out #{@output1} -evalue #{evalue} "
143
+ if split and threads > 1
144
+ run_blast_with_splitting evalue, threads, bin1, bin2
145
+ else
146
+ run_blast_with_threads evalue, threads, bin1, bin2
147
+ end
148
+ return true
149
+ else
150
+ return false
151
+ end
152
+ end
153
+
154
+ def run_blast_with_threads evalue, threads, bin1, bin2
155
+ # puts "running blast with #{threads} threads"
156
+ cmd1 = "#{bin1} -query #{@query} -db #{@working_dir}/#{@target_name} "
157
+ cmd1 << " -out #{@output1} -evalue #{evalue} "
158
+ cmd1 << " -outfmt \"6 std qlen slen\" "
159
+ cmd1 << " -max_target_seqs 50 "
160
+ cmd1 << " -num_threads #{threads}"
161
+
162
+ cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
163
+ cmd2 << " -out #{@output2} -evalue #{evalue} "
164
+ cmd2 << " -outfmt \"6 std qlen slen\" "
165
+ cmd2 << " -max_target_seqs 50 "
166
+ cmd2 << " -num_threads #{threads}"
167
+ if !File.exist?("#{@output1}")
168
+ `#{cmd1}`
169
+ end
170
+
171
+ if !File.exist?("#{@output2}")
172
+ `#{cmd2}`
173
+ end
174
+ end
175
+
176
+ def run_blast_with_splitting evalue, threads, bin1, bin2
177
+ # puts "running blast by splitting input into #{threads} pieces"
178
+ blasts=[]
179
+ files = split_input(@query, threads)
180
+ files.threach(threads) do |thread|
181
+ cmd1 = "#{bin1} -query #{thread} -db #{@working_dir}/#{@target_name} "
182
+ cmd1 << " -out #{thread}.blast -evalue #{evalue} "
147
183
  cmd1 << " -outfmt \"6 std qlen slen\" "
148
184
  cmd1 << " -max_target_seqs 50 "
149
- cmd1 << " -num_threads #{threads}"
185
+ cmd1 << " -num_threads 1"
186
+ if !File.exists?("#{thread}.blast")
187
+ `#{cmd1}`
188
+ end
189
+ blasts << "#{thread}.blast"
190
+ end
191
+ cat_cmd = "cat "
192
+ cat_cmd << blasts.join(" ")
193
+ cat_cmd << " > #{@output1}"
194
+ `#{cat_cmd}`
195
+ blasts.each do |b|
196
+ File.delete(b) # delete intermediate blast output files
197
+ end
150
198
 
151
- cmd2 << " -query #{@target} -db #{@working_dir}/#{@query_name} "
152
- cmd2 << " -out #{@output2} -evalue #{evalue} "
199
+ blasts=[]
200
+ files = split_input(@target, threads)
201
+ files.threach(threads) do |thread|
202
+ cmd2 = "#{bin2} -query #{thread} -db #{@working_dir}/#{@query_name} "
203
+ cmd2 << " -out #{thread}.blast -evalue #{evalue} "
153
204
  cmd2 << " -outfmt \"6 std qlen slen\" "
154
205
  cmd2 << " -max_target_seqs 50 "
155
- cmd2 << " -num_threads #{threads}"
156
-
157
- if !File.exists?("#{@output1}")
158
- `#{cmd1}`
159
- end
160
- if !File.exists?("#{@output2}")
206
+ cmd2 << " -num_threads 1"
207
+ if !File.exists?("#{thread}.blast")
161
208
  `#{cmd2}`
162
209
  end
163
- return true
164
- else
165
- return false
210
+ blasts << "#{thread}.blast"
211
+ end
212
+ cat_cmd = "cat "
213
+ cat_cmd << blasts.join(" ")
214
+ cat_cmd << " > #{@output2}"
215
+ `#{cat_cmd}`
216
+ blasts.each do |b|
217
+ File.delete(b) # delete intermediate blast output files
218
+ end
219
+
220
+ end
221
+
222
+ def split_input filename, pieces
223
+ input = {}
224
+ name = nil
225
+ seq=""
226
+ File.open(filename).each_line do |line|
227
+ if line =~ /^>(.*)$/
228
+ if name
229
+ input[name]=seq
230
+ seq=""
231
+ end
232
+ name = $1
233
+ else
234
+ seq << line.chomp
235
+ end
236
+ end
237
+ input[name]=seq
238
+ # construct list of output file handles
239
+ outputs=[]
240
+ output_files=[]
241
+ pieces.times do |n|
242
+ outfile = "#{filename}_chunk_#{n}.fasta"
243
+ outputs[n] = File.open("#{outfile}", "w")
244
+ output_files[n] = "#{outfile}"
245
+ end
246
+ # write sequences
247
+ count=0
248
+ input.each_pair do |name, seq|
249
+ outputs[count].write(">#{name}\n")
250
+ outputs[count].write("#{seq}\n")
251
+ count += 1
252
+ count %= pieces
253
+ end
254
+ outputs.each do |out|
255
+ out.close
166
256
  end
257
+ output_files
167
258
  end
168
259
 
169
260
  def load_outputs
@@ -312,9 +403,9 @@ class CRB_Blast
312
403
  @target_results = nil
313
404
  end
314
405
 
315
- def run evalue, threads
406
+ def run evalue, threads, split
316
407
  makedb
317
- run_blast evalue, threads
408
+ run_blast evalue, threads, split
318
409
  load_outputs
319
410
  find_reciprocals
320
411
  find_secondaries
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crb-blast
3
3
  version: !ruby/object:Gem::Version
4
- version: '0.2'
4
+ version: '0.3'
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Boursnell
@@ -15,48 +15,48 @@ dependencies:
15
15
  name: trollop
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - '>='
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
20
  version: '0'
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - '>='
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
27
  version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: rake
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - '>='
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
34
  version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - '>='
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
41
  version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: bio
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - ~>
46
+ - - "~>"
47
47
  - !ruby/object:Gem::Version
48
48
  version: '1.4'
49
- - - '>='
49
+ - - ">="
50
50
  - !ruby/object:Gem::Version
51
51
  version: 1.4.3
52
52
  type: :runtime
53
53
  prerelease: false
54
54
  version_requirements: !ruby/object:Gem::Requirement
55
55
  requirements:
56
- - - ~>
56
+ - - "~>"
57
57
  - !ruby/object:Gem::Version
58
58
  version: '1.4'
59
- - - '>='
59
+ - - ">="
60
60
  - !ruby/object:Gem::Version
61
61
  version: 1.4.3
62
62
  - !ruby/object:Gem::Dependency
@@ -73,60 +73,94 @@ dependencies:
73
73
  - - '='
74
74
  - !ruby/object:Gem::Version
75
75
  version: 0.0.2
76
+ - !ruby/object:Gem::Dependency
77
+ name: threach
78
+ requirement: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.2.0
83
+ type: :runtime
84
+ prerelease: false
85
+ version_requirements: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.2.0
90
+ - !ruby/object:Gem::Dependency
91
+ name: bindeps
92
+ requirement: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '0.0'
97
+ - - ">="
98
+ - !ruby/object:Gem::Version
99
+ version: 0.0.7
100
+ type: :runtime
101
+ prerelease: false
102
+ version_requirements: !ruby/object:Gem::Requirement
103
+ requirements:
104
+ - - "~>"
105
+ - !ruby/object:Gem::Version
106
+ version: '0.0'
107
+ - - ">="
108
+ - !ruby/object:Gem::Version
109
+ version: 0.0.7
76
110
  - !ruby/object:Gem::Dependency
77
111
  name: turn
78
112
  requirement: !ruby/object:Gem::Requirement
79
113
  requirements:
80
- - - '>='
114
+ - - ">="
81
115
  - !ruby/object:Gem::Version
82
116
  version: '0'
83
117
  type: :development
84
118
  prerelease: false
85
119
  version_requirements: !ruby/object:Gem::Requirement
86
120
  requirements:
87
- - - '>='
121
+ - - ">="
88
122
  - !ruby/object:Gem::Version
89
123
  version: '0'
90
124
  - !ruby/object:Gem::Dependency
91
125
  name: simplecov
92
126
  requirement: !ruby/object:Gem::Requirement
93
127
  requirements:
94
- - - '>='
128
+ - - ">="
95
129
  - !ruby/object:Gem::Version
96
130
  version: '0'
97
131
  type: :development
98
132
  prerelease: false
99
133
  version_requirements: !ruby/object:Gem::Requirement
100
134
  requirements:
101
- - - '>='
135
+ - - ">="
102
136
  - !ruby/object:Gem::Version
103
137
  version: '0'
104
138
  - !ruby/object:Gem::Dependency
105
139
  name: shoulda-context
106
140
  requirement: !ruby/object:Gem::Requirement
107
141
  requirements:
108
- - - '>='
142
+ - - ">="
109
143
  - !ruby/object:Gem::Version
110
144
  version: '0'
111
145
  type: :development
112
146
  prerelease: false
113
147
  version_requirements: !ruby/object:Gem::Requirement
114
148
  requirements:
115
- - - '>='
149
+ - - ">="
116
150
  - !ruby/object:Gem::Version
117
151
  version: '0'
118
152
  - !ruby/object:Gem::Dependency
119
153
  name: coveralls
120
154
  requirement: !ruby/object:Gem::Requirement
121
155
  requirements:
122
- - - '>='
156
+ - - ">="
123
157
  - !ruby/object:Gem::Version
124
158
  version: 0.6.7
125
159
  type: :development
126
160
  prerelease: false
127
161
  version_requirements: !ruby/object:Gem::Requirement
128
162
  requirements:
129
- - - '>='
163
+ - - ">="
130
164
  - !ruby/object:Gem::Version
131
165
  version: 0.6.7
132
166
  description: See summary
@@ -136,9 +170,9 @@ executables:
136
170
  extensions: []
137
171
  extra_rdoc_files: []
138
172
  files:
173
+ - bin/crb-blast
139
174
  - lib/crb-blast.rb
140
175
  - lib/hit.rb
141
- - bin/crb-blast
142
176
  homepage: http://rubygems.org/gems/crb-blast
143
177
  licenses:
144
178
  - MIT
@@ -149,19 +183,18 @@ require_paths:
149
183
  - lib
150
184
  required_ruby_version: !ruby/object:Gem::Requirement
151
185
  requirements:
152
- - - '>='
186
+ - - ">="
153
187
  - !ruby/object:Gem::Version
154
188
  version: '0'
155
189
  required_rubygems_version: !ruby/object:Gem::Requirement
156
190
  requirements:
157
- - - '>='
191
+ - - ">="
158
192
  - !ruby/object:Gem::Version
159
193
  version: '0'
160
194
  requirements: []
161
195
  rubyforge_project:
162
- rubygems_version: 2.0.6
196
+ rubygems_version: 2.2.2
163
197
  signing_key:
164
198
  specification_version: 4
165
199
  summary: Run conditional reciprocal best blast
166
200
  test_files: []
167
- has_rdoc: