crb-blast 0.6.6 → 0.6.8
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/bin/crb-blast +19 -0
- data/lib/crb-blast/crb-blast.rb +35 -4
- data/lib/crb-blast/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0667a895d64d87a7ec7078342b9449d185d5fc8c
|
4
|
+
data.tar.gz: a0b4d4ffc83573e180beb7c84e778d21c3250155
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c08e2203df9f0a077a14373b3e7f6a7b1b8d5281a7e55754448befe31af46f1247d66a7251c402610214f029f95ad02ae62074b431a9aefbc03d62b4eb8a5dd9
|
7
|
+
data.tar.gz: 8233a2eab64a5fceb841d601904b570eb160de41a6ddeeba5ea80a669823d3da9b15d9e0b095e37d6b47e09ae43cd1261028d33f4037d56dea6c9f91fdcc37e1
|
data/README.md
CHANGED
@@ -82,6 +82,23 @@ blaster.find_reciprocals
|
|
82
82
|
blaster.find_secondaries
|
83
83
|
```
|
84
84
|
|
85
|
+
### Output
|
86
|
+
|
87
|
+
The output file for CRB-Blast pulls columns from the blast output.
|
88
|
+
|
89
|
+
```
|
90
|
+
query - the name of the transcript from the 'query' fasta file
|
91
|
+
target - the name of the transcript from the 'target' fasta file
|
92
|
+
id - the percent sequence identity
|
93
|
+
alnlen - the alignment length
|
94
|
+
evalue - the blast evalue
|
95
|
+
bitscore - the blast bitscore
|
96
|
+
qstart..qend - the coordinates of the alignment in the query from start to end
|
97
|
+
tstart..tend - the coordinates of the alignment in the target from start to end
|
98
|
+
qlen - the length of the query transcript
|
99
|
+
tlen - the length of the target transcript
|
100
|
+
```
|
101
|
+
|
85
102
|
### Getting help
|
86
103
|
|
87
104
|
Please use the issue tracker if you find bugs or have trouble running CRB-BLAST.
|
data/bin/crb-blast
CHANGED
@@ -50,6 +50,9 @@ EOS
|
|
50
50
|
opt :split,
|
51
51
|
"split the fasta files into chunks and run multiple blast jobs and then"+
|
52
52
|
" combine them."
|
53
|
+
|
54
|
+
opt :verbose,
|
55
|
+
"be verbose"
|
53
56
|
end
|
54
57
|
|
55
58
|
Trollop::die :query, "must exist" if !File.exist?(opts[:query])
|
@@ -60,12 +63,27 @@ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
|
60
63
|
Bindeps.require gem_deps
|
61
64
|
|
62
65
|
blaster = CRB_Blast::CRB_Blast.new(opts.query, opts.target)
|
66
|
+
print "Making blast databases..." if opts.verbose
|
63
67
|
dbs = blaster.makedb
|
68
|
+
puts " Done" if opts.verbose
|
69
|
+
|
70
|
+
print "Blasting..." if opts.verbose
|
64
71
|
run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
|
72
|
+
puts " Done" if opts.verbose
|
73
|
+
|
74
|
+
print "Loading..." if opts.verbose
|
65
75
|
load = blaster.load_outputs
|
76
|
+
puts " Done" if opts.verbose
|
77
|
+
|
78
|
+
print "Finding reciprocals..." if opts.verbose
|
66
79
|
recips = blaster.find_reciprocals
|
80
|
+
puts " Done" if opts.verbose
|
81
|
+
|
82
|
+
print "Fitting curve..." if opts.verbose
|
67
83
|
secondaries = blaster.find_secondaries
|
84
|
+
puts " Done" if opts.verbose
|
68
85
|
|
86
|
+
print "Writing output..." if opts.verbose
|
69
87
|
File.open("#{opts.output}", 'w') do |out|
|
70
88
|
blaster.reciprocals.each_pair do |query_id, hits|
|
71
89
|
hits.each do |hit|
|
@@ -73,3 +91,4 @@ File.open("#{opts.output}", 'w') do |out|
|
|
73
91
|
end
|
74
92
|
end
|
75
93
|
end
|
94
|
+
puts " Done" if opts.verbose
|
data/lib/crb-blast/crb-blast.rb
CHANGED
@@ -24,6 +24,7 @@ module CRB_Blast
|
|
24
24
|
attr_accessor :missed
|
25
25
|
attr_accessor :target_is_prot, :query_is_prot
|
26
26
|
attr_accessor :query_results, :target_results, :working_dir
|
27
|
+
attr_reader :reciprocal_hits
|
27
28
|
|
28
29
|
def initialize query, target, output=nil
|
29
30
|
raise IOError.new("File not found #{query}") if !File.exist?(query)
|
@@ -189,12 +190,22 @@ module CRB_Blast
|
|
189
190
|
cmd1 << " -out #{@output1} -evalue #{evalue} "
|
190
191
|
cmd1 << " -outfmt \"6 std qlen slen\" "
|
191
192
|
cmd1 << " -max_target_seqs 50 "
|
193
|
+
if bin1=="blastn"
|
194
|
+
cmd1 << " -dust no "
|
195
|
+
elsif bin1=~/blastx/ or bin1=~/blastp/ or bin1=~/tblastn/
|
196
|
+
cmd1 << " -seg no "
|
197
|
+
end
|
192
198
|
cmd1 << " -num_threads #{threads}"
|
193
199
|
|
194
200
|
cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
|
195
201
|
cmd2 << " -out #{@output2} -evalue #{evalue} "
|
196
202
|
cmd2 << " -outfmt \"6 std qlen slen\" "
|
197
203
|
cmd2 << " -max_target_seqs 50 "
|
204
|
+
if bin2=="blastn"
|
205
|
+
cmd2 << " -dust no "
|
206
|
+
elsif bin2=~/blastx/ or bin2=~/blastp/ or bin2=~/tblastn/
|
207
|
+
cmd2 << " -seg no "
|
208
|
+
end
|
198
209
|
cmd2 << " -num_threads #{threads}"
|
199
210
|
if !File.exist?("#{@output1}")
|
200
211
|
blast1 = Cmd.new(cmd1)
|
@@ -432,9 +443,12 @@ module CRB_Blast
|
|
432
443
|
else
|
433
444
|
length_hash = Hash.new
|
434
445
|
fitting = Hash.new
|
435
|
-
|
436
|
-
|
437
|
-
|
446
|
+
File.open("evalues_data", "w") do |io|
|
447
|
+
@evalues.each do |h|
|
448
|
+
length_hash[h[:length]] = [] if !length_hash.key?(h[:length])
|
449
|
+
length_hash[h[:length]] << h
|
450
|
+
io.write "#{h[:length]}\t#{h[:e]}\n"
|
451
|
+
end
|
438
452
|
end
|
439
453
|
|
440
454
|
(10..@longest).each do |centre|
|
@@ -453,9 +467,24 @@ module CRB_Blast
|
|
453
467
|
end
|
454
468
|
if count>0
|
455
469
|
mean = e/count
|
456
|
-
fitting[centre]
|
470
|
+
if fitting[centre-1]
|
471
|
+
if fitting[centre-1] > mean # monotonic fitting
|
472
|
+
fitting[centre] = fitting[centre-1]
|
473
|
+
else
|
474
|
+
fitting[centre] = mean
|
475
|
+
end
|
476
|
+
else
|
477
|
+
fitting[centre] = mean
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
# output fitting data
|
482
|
+
File.open("fitting_data", "w") do |io|
|
483
|
+
fitting.each do |centre, mean|
|
484
|
+
io.write "#{centre}\t#{mean}\n"
|
457
485
|
end
|
458
486
|
end
|
487
|
+
#
|
459
488
|
hits = 0
|
460
489
|
@missed.each_pair do |id, list|
|
461
490
|
list.each do |hit|
|
@@ -531,6 +560,8 @@ module CRB_Blast
|
|
531
560
|
def write_output
|
532
561
|
s=""
|
533
562
|
unless @reciprocals.nil?
|
563
|
+
s << "query\ttarget\tid\talnlen\tevalue\tbitscore\t"
|
564
|
+
s << "qstart..qend\ttstart..tend\tqlen\ttlen\n"
|
534
565
|
@reciprocals.each_pair do |query_id, hits|
|
535
566
|
hits.each do |hit|
|
536
567
|
s << "#{hit}\n"
|
data/lib/crb-blast/version.rb
CHANGED