crb-blast 0.2 → 0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/bin/crb-blast +11 -2
- data/lib/crb-blast.rb +119 -28
- metadata +55 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb3b123812929285b57903de865b981104ffdfe9
|
4
|
+
data.tar.gz: d66187a8a9b08b4bb282d03ed75898f1f5789af8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 260463b1df616c63dbd262fb5d0e205610d221c4d81fdf0216e28fdcf8fde7d7f821c2e10d80edec969d5d9b64288823e169d06f7235abbca7553f1a633e9fd2
|
7
|
+
data.tar.gz: cde2ffeb8b7efe84d6ca09f99358dc5dd7cf77f193dfc91669af7a9ba54e66712f085c9dbe26d45f7abe2e01a96f453568198b1516fedcecf2ee0f3cefcf6c70
|
data/bin/crb-blast
CHANGED
@@ -6,11 +6,12 @@
|
|
6
6
|
|
7
7
|
require 'trollop'
|
8
8
|
require 'crb-blast'
|
9
|
+
require 'bindeps'
|
9
10
|
|
10
11
|
opts = Trollop::options do
|
11
12
|
banner <<-EOS
|
12
13
|
|
13
|
-
CRB-Blast v0.
|
14
|
+
CRB-Blast v0.3 by Chris Boursnell <cmb211@cam.ac.uk>
|
14
15
|
|
15
16
|
Conditional Reciprocal Best BLAST
|
16
17
|
|
@@ -44,14 +45,22 @@ EOS
|
|
44
45
|
"output file as tsv",
|
45
46
|
:required => true,
|
46
47
|
:type => String
|
48
|
+
|
49
|
+
opt :split,
|
50
|
+
"split the fasta files into chunks and run multiple blast jobs and then"+
|
51
|
+
"combine them."
|
47
52
|
end
|
48
53
|
|
49
54
|
Trollop::die :query, "must exist" if !File.exist?(opts[:query])
|
50
55
|
Trollop::die :target, "must exist" if !File.exist?(opts[:target])
|
51
56
|
|
57
|
+
gem_dir = Gem.loaded_specs['crb-blast'].full_gem_path
|
58
|
+
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
59
|
+
Bindeps.require gem_deps
|
60
|
+
|
52
61
|
blaster = CRB_Blast.new(opts.query, opts.target)
|
53
62
|
dbs = blaster.makedb
|
54
|
-
run = blaster.run_blast(opts.evalue, opts.threads)
|
63
|
+
run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
|
55
64
|
load = blaster.load_outputs
|
56
65
|
recips = blaster.find_reciprocals
|
57
66
|
secondaries = blaster.find_secondaries
|
data/lib/crb-blast.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'bio'
|
4
4
|
require 'which'
|
5
5
|
require 'hit'
|
6
|
-
require '
|
6
|
+
require 'threach'
|
7
7
|
|
8
8
|
class Bio::FastaFormat
|
9
9
|
def isNucl?
|
@@ -119,51 +119,142 @@ class CRB_Blast
|
|
119
119
|
[@query_name, @target_name]
|
120
120
|
end
|
121
121
|
|
122
|
-
def run_blast(evalue, threads)
|
122
|
+
def run_blast(evalue, threads, split)
|
123
123
|
if @databases
|
124
124
|
@output1 = "#{@working_dir}/#{query_name}_into_#{target_name}.1.blast"
|
125
125
|
@output2 = "#{@working_dir}/#{target_name}_into_#{query_name}.2.blast"
|
126
|
-
cmd1=""
|
127
|
-
cmd2=""
|
128
126
|
if @query_is_prot
|
129
127
|
if @target_is_prot
|
130
|
-
|
131
|
-
|
128
|
+
bin1 = "#{@blastp_path} "
|
129
|
+
bin2 = "#{@blastp_path} "
|
132
130
|
else
|
133
|
-
|
134
|
-
|
131
|
+
bin1 = "#{@tblastn_path} "
|
132
|
+
bin2 = "#{@blastx_path} "
|
135
133
|
end
|
136
134
|
else
|
137
135
|
if @target_is_prot
|
138
|
-
|
139
|
-
|
136
|
+
bin1 = "#{@blastx_path} "
|
137
|
+
bin2 = "#{@tblastn_path} "
|
140
138
|
else
|
141
|
-
|
142
|
-
|
139
|
+
bin1 = "#{@blastn_path} "
|
140
|
+
bin2 = "#{@blastn_path} "
|
143
141
|
end
|
144
142
|
end
|
145
|
-
|
146
|
-
|
143
|
+
if split and threads > 1
|
144
|
+
run_blast_with_splitting evalue, threads, bin1, bin2
|
145
|
+
else
|
146
|
+
run_blast_with_threads evalue, threads, bin1, bin2
|
147
|
+
end
|
148
|
+
return true
|
149
|
+
else
|
150
|
+
return false
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def run_blast_with_threads evalue, threads, bin1, bin2
|
155
|
+
# puts "running blast with #{threads} threads"
|
156
|
+
cmd1 = "#{bin1} -query #{@query} -db #{@working_dir}/#{@target_name} "
|
157
|
+
cmd1 << " -out #{@output1} -evalue #{evalue} "
|
158
|
+
cmd1 << " -outfmt \"6 std qlen slen\" "
|
159
|
+
cmd1 << " -max_target_seqs 50 "
|
160
|
+
cmd1 << " -num_threads #{threads}"
|
161
|
+
|
162
|
+
cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
|
163
|
+
cmd2 << " -out #{@output2} -evalue #{evalue} "
|
164
|
+
cmd2 << " -outfmt \"6 std qlen slen\" "
|
165
|
+
cmd2 << " -max_target_seqs 50 "
|
166
|
+
cmd2 << " -num_threads #{threads}"
|
167
|
+
if !File.exist?("#{@output1}")
|
168
|
+
`#{cmd1}`
|
169
|
+
end
|
170
|
+
|
171
|
+
if !File.exist?("#{@output2}")
|
172
|
+
`#{cmd2}`
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def run_blast_with_splitting evalue, threads, bin1, bin2
|
177
|
+
# puts "running blast by splitting input into #{threads} pieces"
|
178
|
+
blasts=[]
|
179
|
+
files = split_input(@query, threads)
|
180
|
+
files.threach(threads) do |thread|
|
181
|
+
cmd1 = "#{bin1} -query #{thread} -db #{@working_dir}/#{@target_name} "
|
182
|
+
cmd1 << " -out #{thread}.blast -evalue #{evalue} "
|
147
183
|
cmd1 << " -outfmt \"6 std qlen slen\" "
|
148
184
|
cmd1 << " -max_target_seqs 50 "
|
149
|
-
cmd1 << " -num_threads
|
185
|
+
cmd1 << " -num_threads 1"
|
186
|
+
if !File.exists?("#{thread}.blast")
|
187
|
+
`#{cmd1}`
|
188
|
+
end
|
189
|
+
blasts << "#{thread}.blast"
|
190
|
+
end
|
191
|
+
cat_cmd = "cat "
|
192
|
+
cat_cmd << blasts.join(" ")
|
193
|
+
cat_cmd << " > #{@output1}"
|
194
|
+
`#{cat_cmd}`
|
195
|
+
blasts.each do |b|
|
196
|
+
File.delete(b) # delete intermediate blast output files
|
197
|
+
end
|
150
198
|
|
151
|
-
|
152
|
-
|
199
|
+
blasts=[]
|
200
|
+
files = split_input(@target, threads)
|
201
|
+
files.threach(threads) do |thread|
|
202
|
+
cmd2 = "#{bin2} -query #{thread} -db #{@working_dir}/#{@query_name} "
|
203
|
+
cmd2 << " -out #{thread}.blast -evalue #{evalue} "
|
153
204
|
cmd2 << " -outfmt \"6 std qlen slen\" "
|
154
205
|
cmd2 << " -max_target_seqs 50 "
|
155
|
-
cmd2 << " -num_threads
|
156
|
-
|
157
|
-
if !File.exists?("#{@output1}")
|
158
|
-
`#{cmd1}`
|
159
|
-
end
|
160
|
-
if !File.exists?("#{@output2}")
|
206
|
+
cmd2 << " -num_threads 1"
|
207
|
+
if !File.exists?("#{thread}.blast")
|
161
208
|
`#{cmd2}`
|
162
209
|
end
|
163
|
-
|
164
|
-
|
165
|
-
|
210
|
+
blasts << "#{thread}.blast"
|
211
|
+
end
|
212
|
+
cat_cmd = "cat "
|
213
|
+
cat_cmd << blasts.join(" ")
|
214
|
+
cat_cmd << " > #{@output2}"
|
215
|
+
`#{cat_cmd}`
|
216
|
+
blasts.each do |b|
|
217
|
+
File.delete(b) # delete intermediate blast output files
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
def split_input filename, pieces
|
223
|
+
input = {}
|
224
|
+
name = nil
|
225
|
+
seq=""
|
226
|
+
File.open(filename).each_line do |line|
|
227
|
+
if line =~ /^>(.*)$/
|
228
|
+
if name
|
229
|
+
input[name]=seq
|
230
|
+
seq=""
|
231
|
+
end
|
232
|
+
name = $1
|
233
|
+
else
|
234
|
+
seq << line.chomp
|
235
|
+
end
|
236
|
+
end
|
237
|
+
input[name]=seq
|
238
|
+
# construct list of output file handles
|
239
|
+
outputs=[]
|
240
|
+
output_files=[]
|
241
|
+
pieces.times do |n|
|
242
|
+
outfile = "#{filename}_chunk_#{n}.fasta"
|
243
|
+
outputs[n] = File.open("#{outfile}", "w")
|
244
|
+
output_files[n] = "#{outfile}"
|
245
|
+
end
|
246
|
+
# write sequences
|
247
|
+
count=0
|
248
|
+
input.each_pair do |name, seq|
|
249
|
+
outputs[count].write(">#{name}\n")
|
250
|
+
outputs[count].write("#{seq}\n")
|
251
|
+
count += 1
|
252
|
+
count %= pieces
|
253
|
+
end
|
254
|
+
outputs.each do |out|
|
255
|
+
out.close
|
166
256
|
end
|
257
|
+
output_files
|
167
258
|
end
|
168
259
|
|
169
260
|
def load_outputs
|
@@ -312,9 +403,9 @@ class CRB_Blast
|
|
312
403
|
@target_results = nil
|
313
404
|
end
|
314
405
|
|
315
|
-
def run evalue, threads
|
406
|
+
def run evalue, threads, split
|
316
407
|
makedb
|
317
|
-
run_blast evalue, threads
|
408
|
+
run_blast evalue, threads, split
|
318
409
|
load_outputs
|
319
410
|
find_reciprocals
|
320
411
|
find_secondaries
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crb-blast
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Boursnell
|
@@ -15,48 +15,48 @@ dependencies:
|
|
15
15
|
name: trollop
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '0'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rake
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: bio
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- - ~>
|
46
|
+
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '1.4'
|
49
|
-
- -
|
49
|
+
- - ">="
|
50
50
|
- !ruby/object:Gem::Version
|
51
51
|
version: 1.4.3
|
52
52
|
type: :runtime
|
53
53
|
prerelease: false
|
54
54
|
version_requirements: !ruby/object:Gem::Requirement
|
55
55
|
requirements:
|
56
|
-
- - ~>
|
56
|
+
- - "~>"
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '1.4'
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: 1.4.3
|
62
62
|
- !ruby/object:Gem::Dependency
|
@@ -73,60 +73,94 @@ dependencies:
|
|
73
73
|
- - '='
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: 0.0.2
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: threach
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.2.0
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.2.0
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: bindeps
|
92
|
+
requirement: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.0'
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 0.0.7
|
100
|
+
type: :runtime
|
101
|
+
prerelease: false
|
102
|
+
version_requirements: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - "~>"
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0.0'
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 0.0.7
|
76
110
|
- !ruby/object:Gem::Dependency
|
77
111
|
name: turn
|
78
112
|
requirement: !ruby/object:Gem::Requirement
|
79
113
|
requirements:
|
80
|
-
- -
|
114
|
+
- - ">="
|
81
115
|
- !ruby/object:Gem::Version
|
82
116
|
version: '0'
|
83
117
|
type: :development
|
84
118
|
prerelease: false
|
85
119
|
version_requirements: !ruby/object:Gem::Requirement
|
86
120
|
requirements:
|
87
|
-
- -
|
121
|
+
- - ">="
|
88
122
|
- !ruby/object:Gem::Version
|
89
123
|
version: '0'
|
90
124
|
- !ruby/object:Gem::Dependency
|
91
125
|
name: simplecov
|
92
126
|
requirement: !ruby/object:Gem::Requirement
|
93
127
|
requirements:
|
94
|
-
- -
|
128
|
+
- - ">="
|
95
129
|
- !ruby/object:Gem::Version
|
96
130
|
version: '0'
|
97
131
|
type: :development
|
98
132
|
prerelease: false
|
99
133
|
version_requirements: !ruby/object:Gem::Requirement
|
100
134
|
requirements:
|
101
|
-
- -
|
135
|
+
- - ">="
|
102
136
|
- !ruby/object:Gem::Version
|
103
137
|
version: '0'
|
104
138
|
- !ruby/object:Gem::Dependency
|
105
139
|
name: shoulda-context
|
106
140
|
requirement: !ruby/object:Gem::Requirement
|
107
141
|
requirements:
|
108
|
-
- -
|
142
|
+
- - ">="
|
109
143
|
- !ruby/object:Gem::Version
|
110
144
|
version: '0'
|
111
145
|
type: :development
|
112
146
|
prerelease: false
|
113
147
|
version_requirements: !ruby/object:Gem::Requirement
|
114
148
|
requirements:
|
115
|
-
- -
|
149
|
+
- - ">="
|
116
150
|
- !ruby/object:Gem::Version
|
117
151
|
version: '0'
|
118
152
|
- !ruby/object:Gem::Dependency
|
119
153
|
name: coveralls
|
120
154
|
requirement: !ruby/object:Gem::Requirement
|
121
155
|
requirements:
|
122
|
-
- -
|
156
|
+
- - ">="
|
123
157
|
- !ruby/object:Gem::Version
|
124
158
|
version: 0.6.7
|
125
159
|
type: :development
|
126
160
|
prerelease: false
|
127
161
|
version_requirements: !ruby/object:Gem::Requirement
|
128
162
|
requirements:
|
129
|
-
- -
|
163
|
+
- - ">="
|
130
164
|
- !ruby/object:Gem::Version
|
131
165
|
version: 0.6.7
|
132
166
|
description: See summary
|
@@ -136,9 +170,9 @@ executables:
|
|
136
170
|
extensions: []
|
137
171
|
extra_rdoc_files: []
|
138
172
|
files:
|
173
|
+
- bin/crb-blast
|
139
174
|
- lib/crb-blast.rb
|
140
175
|
- lib/hit.rb
|
141
|
-
- bin/crb-blast
|
142
176
|
homepage: http://rubygems.org/gems/crb-blast
|
143
177
|
licenses:
|
144
178
|
- MIT
|
@@ -149,19 +183,18 @@ require_paths:
|
|
149
183
|
- lib
|
150
184
|
required_ruby_version: !ruby/object:Gem::Requirement
|
151
185
|
requirements:
|
152
|
-
- -
|
186
|
+
- - ">="
|
153
187
|
- !ruby/object:Gem::Version
|
154
188
|
version: '0'
|
155
189
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
190
|
requirements:
|
157
|
-
- -
|
191
|
+
- - ">="
|
158
192
|
- !ruby/object:Gem::Version
|
159
193
|
version: '0'
|
160
194
|
requirements: []
|
161
195
|
rubyforge_project:
|
162
|
-
rubygems_version: 2.
|
196
|
+
rubygems_version: 2.2.2
|
163
197
|
signing_key:
|
164
198
|
specification_version: 4
|
165
199
|
summary: Run conditional reciprocal best blast
|
166
200
|
test_files: []
|
167
|
-
has_rdoc:
|