crb-blast 0.2 → 0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/bin/crb-blast +11 -2
- data/lib/crb-blast.rb +119 -28
- metadata +55 -22
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: fb3b123812929285b57903de865b981104ffdfe9
|
4
|
+
data.tar.gz: d66187a8a9b08b4bb282d03ed75898f1f5789af8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 260463b1df616c63dbd262fb5d0e205610d221c4d81fdf0216e28fdcf8fde7d7f821c2e10d80edec969d5d9b64288823e169d06f7235abbca7553f1a633e9fd2
|
7
|
+
data.tar.gz: cde2ffeb8b7efe84d6ca09f99358dc5dd7cf77f193dfc91669af7a9ba54e66712f085c9dbe26d45f7abe2e01a96f453568198b1516fedcecf2ee0f3cefcf6c70
|
data/bin/crb-blast
CHANGED
@@ -6,11 +6,12 @@
|
|
6
6
|
|
7
7
|
require 'trollop'
|
8
8
|
require 'crb-blast'
|
9
|
+
require 'bindeps'
|
9
10
|
|
10
11
|
opts = Trollop::options do
|
11
12
|
banner <<-EOS
|
12
13
|
|
13
|
-
CRB-Blast v0.
|
14
|
+
CRB-Blast v0.3 by Chris Boursnell <cmb211@cam.ac.uk>
|
14
15
|
|
15
16
|
Conditional Reciprocal Best BLAST
|
16
17
|
|
@@ -44,14 +45,22 @@ EOS
|
|
44
45
|
"output file as tsv",
|
45
46
|
:required => true,
|
46
47
|
:type => String
|
48
|
+
|
49
|
+
opt :split,
|
50
|
+
"split the fasta files into chunks and run multiple blast jobs and then"+
|
51
|
+
"combine them."
|
47
52
|
end
|
48
53
|
|
49
54
|
Trollop::die :query, "must exist" if !File.exist?(opts[:query])
|
50
55
|
Trollop::die :target, "must exist" if !File.exist?(opts[:target])
|
51
56
|
|
57
|
+
gem_dir = Gem.loaded_specs['crb-blast'].full_gem_path
|
58
|
+
gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
59
|
+
Bindeps.require gem_deps
|
60
|
+
|
52
61
|
blaster = CRB_Blast.new(opts.query, opts.target)
|
53
62
|
dbs = blaster.makedb
|
54
|
-
run = blaster.run_blast(opts.evalue, opts.threads)
|
63
|
+
run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
|
55
64
|
load = blaster.load_outputs
|
56
65
|
recips = blaster.find_reciprocals
|
57
66
|
secondaries = blaster.find_secondaries
|
data/lib/crb-blast.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'bio'
|
4
4
|
require 'which'
|
5
5
|
require 'hit'
|
6
|
-
require '
|
6
|
+
require 'threach'
|
7
7
|
|
8
8
|
class Bio::FastaFormat
|
9
9
|
def isNucl?
|
@@ -119,51 +119,142 @@ class CRB_Blast
|
|
119
119
|
[@query_name, @target_name]
|
120
120
|
end
|
121
121
|
|
122
|
-
def run_blast(evalue, threads)
|
122
|
+
def run_blast(evalue, threads, split)
|
123
123
|
if @databases
|
124
124
|
@output1 = "#{@working_dir}/#{query_name}_into_#{target_name}.1.blast"
|
125
125
|
@output2 = "#{@working_dir}/#{target_name}_into_#{query_name}.2.blast"
|
126
|
-
cmd1=""
|
127
|
-
cmd2=""
|
128
126
|
if @query_is_prot
|
129
127
|
if @target_is_prot
|
130
|
-
|
131
|
-
|
128
|
+
bin1 = "#{@blastp_path} "
|
129
|
+
bin2 = "#{@blastp_path} "
|
132
130
|
else
|
133
|
-
|
134
|
-
|
131
|
+
bin1 = "#{@tblastn_path} "
|
132
|
+
bin2 = "#{@blastx_path} "
|
135
133
|
end
|
136
134
|
else
|
137
135
|
if @target_is_prot
|
138
|
-
|
139
|
-
|
136
|
+
bin1 = "#{@blastx_path} "
|
137
|
+
bin2 = "#{@tblastn_path} "
|
140
138
|
else
|
141
|
-
|
142
|
-
|
139
|
+
bin1 = "#{@blastn_path} "
|
140
|
+
bin2 = "#{@blastn_path} "
|
143
141
|
end
|
144
142
|
end
|
145
|
-
|
146
|
-
|
143
|
+
if split and threads > 1
|
144
|
+
run_blast_with_splitting evalue, threads, bin1, bin2
|
145
|
+
else
|
146
|
+
run_blast_with_threads evalue, threads, bin1, bin2
|
147
|
+
end
|
148
|
+
return true
|
149
|
+
else
|
150
|
+
return false
|
151
|
+
end
|
152
|
+
end
|
153
|
+
|
154
|
+
def run_blast_with_threads evalue, threads, bin1, bin2
|
155
|
+
# puts "running blast with #{threads} threads"
|
156
|
+
cmd1 = "#{bin1} -query #{@query} -db #{@working_dir}/#{@target_name} "
|
157
|
+
cmd1 << " -out #{@output1} -evalue #{evalue} "
|
158
|
+
cmd1 << " -outfmt \"6 std qlen slen\" "
|
159
|
+
cmd1 << " -max_target_seqs 50 "
|
160
|
+
cmd1 << " -num_threads #{threads}"
|
161
|
+
|
162
|
+
cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
|
163
|
+
cmd2 << " -out #{@output2} -evalue #{evalue} "
|
164
|
+
cmd2 << " -outfmt \"6 std qlen slen\" "
|
165
|
+
cmd2 << " -max_target_seqs 50 "
|
166
|
+
cmd2 << " -num_threads #{threads}"
|
167
|
+
if !File.exist?("#{@output1}")
|
168
|
+
`#{cmd1}`
|
169
|
+
end
|
170
|
+
|
171
|
+
if !File.exist?("#{@output2}")
|
172
|
+
`#{cmd2}`
|
173
|
+
end
|
174
|
+
end
|
175
|
+
|
176
|
+
def run_blast_with_splitting evalue, threads, bin1, bin2
|
177
|
+
# puts "running blast by splitting input into #{threads} pieces"
|
178
|
+
blasts=[]
|
179
|
+
files = split_input(@query, threads)
|
180
|
+
files.threach(threads) do |thread|
|
181
|
+
cmd1 = "#{bin1} -query #{thread} -db #{@working_dir}/#{@target_name} "
|
182
|
+
cmd1 << " -out #{thread}.blast -evalue #{evalue} "
|
147
183
|
cmd1 << " -outfmt \"6 std qlen slen\" "
|
148
184
|
cmd1 << " -max_target_seqs 50 "
|
149
|
-
cmd1 << " -num_threads
|
185
|
+
cmd1 << " -num_threads 1"
|
186
|
+
if !File.exists?("#{thread}.blast")
|
187
|
+
`#{cmd1}`
|
188
|
+
end
|
189
|
+
blasts << "#{thread}.blast"
|
190
|
+
end
|
191
|
+
cat_cmd = "cat "
|
192
|
+
cat_cmd << blasts.join(" ")
|
193
|
+
cat_cmd << " > #{@output1}"
|
194
|
+
`#{cat_cmd}`
|
195
|
+
blasts.each do |b|
|
196
|
+
File.delete(b) # delete intermediate blast output files
|
197
|
+
end
|
150
198
|
|
151
|
-
|
152
|
-
|
199
|
+
blasts=[]
|
200
|
+
files = split_input(@target, threads)
|
201
|
+
files.threach(threads) do |thread|
|
202
|
+
cmd2 = "#{bin2} -query #{thread} -db #{@working_dir}/#{@query_name} "
|
203
|
+
cmd2 << " -out #{thread}.blast -evalue #{evalue} "
|
153
204
|
cmd2 << " -outfmt \"6 std qlen slen\" "
|
154
205
|
cmd2 << " -max_target_seqs 50 "
|
155
|
-
cmd2 << " -num_threads
|
156
|
-
|
157
|
-
if !File.exists?("#{@output1}")
|
158
|
-
`#{cmd1}`
|
159
|
-
end
|
160
|
-
if !File.exists?("#{@output2}")
|
206
|
+
cmd2 << " -num_threads 1"
|
207
|
+
if !File.exists?("#{thread}.blast")
|
161
208
|
`#{cmd2}`
|
162
209
|
end
|
163
|
-
|
164
|
-
|
165
|
-
|
210
|
+
blasts << "#{thread}.blast"
|
211
|
+
end
|
212
|
+
cat_cmd = "cat "
|
213
|
+
cat_cmd << blasts.join(" ")
|
214
|
+
cat_cmd << " > #{@output2}"
|
215
|
+
`#{cat_cmd}`
|
216
|
+
blasts.each do |b|
|
217
|
+
File.delete(b) # delete intermediate blast output files
|
218
|
+
end
|
219
|
+
|
220
|
+
end
|
221
|
+
|
222
|
+
def split_input filename, pieces
|
223
|
+
input = {}
|
224
|
+
name = nil
|
225
|
+
seq=""
|
226
|
+
File.open(filename).each_line do |line|
|
227
|
+
if line =~ /^>(.*)$/
|
228
|
+
if name
|
229
|
+
input[name]=seq
|
230
|
+
seq=""
|
231
|
+
end
|
232
|
+
name = $1
|
233
|
+
else
|
234
|
+
seq << line.chomp
|
235
|
+
end
|
236
|
+
end
|
237
|
+
input[name]=seq
|
238
|
+
# construct list of output file handles
|
239
|
+
outputs=[]
|
240
|
+
output_files=[]
|
241
|
+
pieces.times do |n|
|
242
|
+
outfile = "#{filename}_chunk_#{n}.fasta"
|
243
|
+
outputs[n] = File.open("#{outfile}", "w")
|
244
|
+
output_files[n] = "#{outfile}"
|
245
|
+
end
|
246
|
+
# write sequences
|
247
|
+
count=0
|
248
|
+
input.each_pair do |name, seq|
|
249
|
+
outputs[count].write(">#{name}\n")
|
250
|
+
outputs[count].write("#{seq}\n")
|
251
|
+
count += 1
|
252
|
+
count %= pieces
|
253
|
+
end
|
254
|
+
outputs.each do |out|
|
255
|
+
out.close
|
166
256
|
end
|
257
|
+
output_files
|
167
258
|
end
|
168
259
|
|
169
260
|
def load_outputs
|
@@ -312,9 +403,9 @@ class CRB_Blast
|
|
312
403
|
@target_results = nil
|
313
404
|
end
|
314
405
|
|
315
|
-
def run evalue, threads
|
406
|
+
def run evalue, threads, split
|
316
407
|
makedb
|
317
|
-
run_blast evalue, threads
|
408
|
+
run_blast evalue, threads, split
|
318
409
|
load_outputs
|
319
410
|
find_reciprocals
|
320
411
|
find_secondaries
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: crb-blast
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.3'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Chris Boursnell
|
@@ -15,48 +15,48 @@ dependencies:
|
|
15
15
|
name: trollop
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
|
-
- -
|
18
|
+
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
20
|
version: '0'
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
|
-
- -
|
25
|
+
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
27
|
version: '0'
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: rake
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
31
31
|
requirements:
|
32
|
-
- -
|
32
|
+
- - ">="
|
33
33
|
- !ruby/object:Gem::Version
|
34
34
|
version: '0'
|
35
35
|
type: :runtime
|
36
36
|
prerelease: false
|
37
37
|
version_requirements: !ruby/object:Gem::Requirement
|
38
38
|
requirements:
|
39
|
-
- -
|
39
|
+
- - ">="
|
40
40
|
- !ruby/object:Gem::Version
|
41
41
|
version: '0'
|
42
42
|
- !ruby/object:Gem::Dependency
|
43
43
|
name: bio
|
44
44
|
requirement: !ruby/object:Gem::Requirement
|
45
45
|
requirements:
|
46
|
-
- - ~>
|
46
|
+
- - "~>"
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '1.4'
|
49
|
-
- -
|
49
|
+
- - ">="
|
50
50
|
- !ruby/object:Gem::Version
|
51
51
|
version: 1.4.3
|
52
52
|
type: :runtime
|
53
53
|
prerelease: false
|
54
54
|
version_requirements: !ruby/object:Gem::Requirement
|
55
55
|
requirements:
|
56
|
-
- - ~>
|
56
|
+
- - "~>"
|
57
57
|
- !ruby/object:Gem::Version
|
58
58
|
version: '1.4'
|
59
|
-
- -
|
59
|
+
- - ">="
|
60
60
|
- !ruby/object:Gem::Version
|
61
61
|
version: 1.4.3
|
62
62
|
- !ruby/object:Gem::Dependency
|
@@ -73,60 +73,94 @@ dependencies:
|
|
73
73
|
- - '='
|
74
74
|
- !ruby/object:Gem::Version
|
75
75
|
version: 0.0.2
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: threach
|
78
|
+
requirement: !ruby/object:Gem::Requirement
|
79
|
+
requirements:
|
80
|
+
- - "~>"
|
81
|
+
- !ruby/object:Gem::Version
|
82
|
+
version: 0.2.0
|
83
|
+
type: :runtime
|
84
|
+
prerelease: false
|
85
|
+
version_requirements: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - "~>"
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: 0.2.0
|
90
|
+
- !ruby/object:Gem::Dependency
|
91
|
+
name: bindeps
|
92
|
+
requirement: !ruby/object:Gem::Requirement
|
93
|
+
requirements:
|
94
|
+
- - "~>"
|
95
|
+
- !ruby/object:Gem::Version
|
96
|
+
version: '0.0'
|
97
|
+
- - ">="
|
98
|
+
- !ruby/object:Gem::Version
|
99
|
+
version: 0.0.7
|
100
|
+
type: :runtime
|
101
|
+
prerelease: false
|
102
|
+
version_requirements: !ruby/object:Gem::Requirement
|
103
|
+
requirements:
|
104
|
+
- - "~>"
|
105
|
+
- !ruby/object:Gem::Version
|
106
|
+
version: '0.0'
|
107
|
+
- - ">="
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: 0.0.7
|
76
110
|
- !ruby/object:Gem::Dependency
|
77
111
|
name: turn
|
78
112
|
requirement: !ruby/object:Gem::Requirement
|
79
113
|
requirements:
|
80
|
-
- -
|
114
|
+
- - ">="
|
81
115
|
- !ruby/object:Gem::Version
|
82
116
|
version: '0'
|
83
117
|
type: :development
|
84
118
|
prerelease: false
|
85
119
|
version_requirements: !ruby/object:Gem::Requirement
|
86
120
|
requirements:
|
87
|
-
- -
|
121
|
+
- - ">="
|
88
122
|
- !ruby/object:Gem::Version
|
89
123
|
version: '0'
|
90
124
|
- !ruby/object:Gem::Dependency
|
91
125
|
name: simplecov
|
92
126
|
requirement: !ruby/object:Gem::Requirement
|
93
127
|
requirements:
|
94
|
-
- -
|
128
|
+
- - ">="
|
95
129
|
- !ruby/object:Gem::Version
|
96
130
|
version: '0'
|
97
131
|
type: :development
|
98
132
|
prerelease: false
|
99
133
|
version_requirements: !ruby/object:Gem::Requirement
|
100
134
|
requirements:
|
101
|
-
- -
|
135
|
+
- - ">="
|
102
136
|
- !ruby/object:Gem::Version
|
103
137
|
version: '0'
|
104
138
|
- !ruby/object:Gem::Dependency
|
105
139
|
name: shoulda-context
|
106
140
|
requirement: !ruby/object:Gem::Requirement
|
107
141
|
requirements:
|
108
|
-
- -
|
142
|
+
- - ">="
|
109
143
|
- !ruby/object:Gem::Version
|
110
144
|
version: '0'
|
111
145
|
type: :development
|
112
146
|
prerelease: false
|
113
147
|
version_requirements: !ruby/object:Gem::Requirement
|
114
148
|
requirements:
|
115
|
-
- -
|
149
|
+
- - ">="
|
116
150
|
- !ruby/object:Gem::Version
|
117
151
|
version: '0'
|
118
152
|
- !ruby/object:Gem::Dependency
|
119
153
|
name: coveralls
|
120
154
|
requirement: !ruby/object:Gem::Requirement
|
121
155
|
requirements:
|
122
|
-
- -
|
156
|
+
- - ">="
|
123
157
|
- !ruby/object:Gem::Version
|
124
158
|
version: 0.6.7
|
125
159
|
type: :development
|
126
160
|
prerelease: false
|
127
161
|
version_requirements: !ruby/object:Gem::Requirement
|
128
162
|
requirements:
|
129
|
-
- -
|
163
|
+
- - ">="
|
130
164
|
- !ruby/object:Gem::Version
|
131
165
|
version: 0.6.7
|
132
166
|
description: See summary
|
@@ -136,9 +170,9 @@ executables:
|
|
136
170
|
extensions: []
|
137
171
|
extra_rdoc_files: []
|
138
172
|
files:
|
173
|
+
- bin/crb-blast
|
139
174
|
- lib/crb-blast.rb
|
140
175
|
- lib/hit.rb
|
141
|
-
- bin/crb-blast
|
142
176
|
homepage: http://rubygems.org/gems/crb-blast
|
143
177
|
licenses:
|
144
178
|
- MIT
|
@@ -149,19 +183,18 @@ require_paths:
|
|
149
183
|
- lib
|
150
184
|
required_ruby_version: !ruby/object:Gem::Requirement
|
151
185
|
requirements:
|
152
|
-
- -
|
186
|
+
- - ">="
|
153
187
|
- !ruby/object:Gem::Version
|
154
188
|
version: '0'
|
155
189
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
156
190
|
requirements:
|
157
|
-
- -
|
191
|
+
- - ">="
|
158
192
|
- !ruby/object:Gem::Version
|
159
193
|
version: '0'
|
160
194
|
requirements: []
|
161
195
|
rubyforge_project:
|
162
|
-
rubygems_version: 2.
|
196
|
+
rubygems_version: 2.2.2
|
163
197
|
signing_key:
|
164
198
|
specification_version: 4
|
165
199
|
summary: Run conditional reciprocal best blast
|
166
200
|
test_files: []
|
167
|
-
has_rdoc:
|