crb-blast 0.6.6 → 0.6.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +17 -0
- data/bin/crb-blast +19 -0
- data/lib/crb-blast/crb-blast.rb +35 -4
- data/lib/crb-blast/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0667a895d64d87a7ec7078342b9449d185d5fc8c
|
4
|
+
data.tar.gz: a0b4d4ffc83573e180beb7c84e778d21c3250155
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c08e2203df9f0a077a14373b3e7f6a7b1b8d5281a7e55754448befe31af46f1247d66a7251c402610214f029f95ad02ae62074b431a9aefbc03d62b4eb8a5dd9
|
7
|
+
data.tar.gz: 8233a2eab64a5fceb841d601904b570eb160de41a6ddeeba5ea80a669823d3da9b15d9e0b095e37d6b47e09ae43cd1261028d33f4037d56dea6c9f91fdcc37e1
|
data/README.md
CHANGED
@@ -82,6 +82,23 @@ blaster.find_reciprocals
|
|
82
82
|
blaster.find_secondaries
|
83
83
|
```
|
84
84
|
|
85
|
+
### Output
|
86
|
+
|
87
|
+
The output file for CRB-Blast pulls columns from the blast output.
|
88
|
+
|
89
|
+
```
|
90
|
+
query - the name of the transcript from the 'query' fasta file
|
91
|
+
target - the name of the transcript from the 'target' fasta file
|
92
|
+
id - the percent sequence identity
|
93
|
+
alnlen - the alignment length
|
94
|
+
evalue - the blast evalue
|
95
|
+
bitscore - the blast bitscore
|
96
|
+
qstart..qend - the coordinates of the alignment in the query from start to end
|
97
|
+
tstart..tend - the coordinates of the alignment in the target from start to end
|
98
|
+
qlen - the length of the query transcript
|
99
|
+
tlen - the length of the target transcript
|
100
|
+
```
|
101
|
+
|
85
102
|
### Getting help
|
86
103
|
|
87
104
|
Please use the issue tracker if you find bugs or have trouble running CRB-BLAST.
|
data/bin/crb-blast
CHANGED
@@ -50,6 +50,9 @@ EOS
|
|
50
50
|
opt :split,
|
51
51
|
"split the fasta files into chunks and run multiple blast jobs and then"+
|
52
52
|
" combine them."
|
53
|
+
|
54
|
+
opt :verbose,
|
55
|
+
"be verbose"
|
53
56
|
end
|
54
57
|
|
55
58
|
Trollop::die :query, "must exist" if !File.exist?(opts[:query])
|
@@ -60,12 +63,27 @@ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
|
|
60
63
|
Bindeps.require gem_deps
|
61
64
|
|
62
65
|
blaster = CRB_Blast::CRB_Blast.new(opts.query, opts.target)
|
66
|
+
print "Making blast databases..." if opts.verbose
|
63
67
|
dbs = blaster.makedb
|
68
|
+
puts " Done" if opts.verbose
|
69
|
+
|
70
|
+
print "Blasting..." if opts.verbose
|
64
71
|
run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
|
72
|
+
puts " Done" if opts.verbose
|
73
|
+
|
74
|
+
print "Loading..." if opts.verbose
|
65
75
|
load = blaster.load_outputs
|
76
|
+
puts " Done" if opts.verbose
|
77
|
+
|
78
|
+
print "Finding reciprocals..." if opts.verbose
|
66
79
|
recips = blaster.find_reciprocals
|
80
|
+
puts " Done" if opts.verbose
|
81
|
+
|
82
|
+
print "Fitting curve..." if opts.verbose
|
67
83
|
secondaries = blaster.find_secondaries
|
84
|
+
puts " Done" if opts.verbose
|
68
85
|
|
86
|
+
print "Writing output..." if opts.verbose
|
69
87
|
File.open("#{opts.output}", 'w') do |out|
|
70
88
|
blaster.reciprocals.each_pair do |query_id, hits|
|
71
89
|
hits.each do |hit|
|
@@ -73,3 +91,4 @@ File.open("#{opts.output}", 'w') do |out|
|
|
73
91
|
end
|
74
92
|
end
|
75
93
|
end
|
94
|
+
puts " Done" if opts.verbose
|
data/lib/crb-blast/crb-blast.rb
CHANGED
@@ -24,6 +24,7 @@ module CRB_Blast
|
|
24
24
|
attr_accessor :missed
|
25
25
|
attr_accessor :target_is_prot, :query_is_prot
|
26
26
|
attr_accessor :query_results, :target_results, :working_dir
|
27
|
+
attr_reader :reciprocal_hits
|
27
28
|
|
28
29
|
def initialize query, target, output=nil
|
29
30
|
raise IOError.new("File not found #{query}") if !File.exist?(query)
|
@@ -189,12 +190,22 @@ module CRB_Blast
|
|
189
190
|
cmd1 << " -out #{@output1} -evalue #{evalue} "
|
190
191
|
cmd1 << " -outfmt \"6 std qlen slen\" "
|
191
192
|
cmd1 << " -max_target_seqs 50 "
|
193
|
+
if bin1=="blastn"
|
194
|
+
cmd1 << " -dust no "
|
195
|
+
elsif bin1=~/blastx/ or bin1=~/blastp/ or bin1=~/tblastn/
|
196
|
+
cmd1 << " -seg no "
|
197
|
+
end
|
192
198
|
cmd1 << " -num_threads #{threads}"
|
193
199
|
|
194
200
|
cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
|
195
201
|
cmd2 << " -out #{@output2} -evalue #{evalue} "
|
196
202
|
cmd2 << " -outfmt \"6 std qlen slen\" "
|
197
203
|
cmd2 << " -max_target_seqs 50 "
|
204
|
+
if bin2=="blastn"
|
205
|
+
cmd2 << " -dust no "
|
206
|
+
elsif bin2=~/blastx/ or bin2=~/blastp/ or bin2=~/tblastn/
|
207
|
+
cmd2 << " -seg no "
|
208
|
+
end
|
198
209
|
cmd2 << " -num_threads #{threads}"
|
199
210
|
if !File.exist?("#{@output1}")
|
200
211
|
blast1 = Cmd.new(cmd1)
|
@@ -432,9 +443,12 @@ module CRB_Blast
|
|
432
443
|
else
|
433
444
|
length_hash = Hash.new
|
434
445
|
fitting = Hash.new
|
435
|
-
|
436
|
-
|
437
|
-
|
446
|
+
File.open("evalues_data", "w") do |io|
|
447
|
+
@evalues.each do |h|
|
448
|
+
length_hash[h[:length]] = [] if !length_hash.key?(h[:length])
|
449
|
+
length_hash[h[:length]] << h
|
450
|
+
io.write "#{h[:length]}\t#{h[:e]}\n"
|
451
|
+
end
|
438
452
|
end
|
439
453
|
|
440
454
|
(10..@longest).each do |centre|
|
@@ -453,9 +467,24 @@ module CRB_Blast
|
|
453
467
|
end
|
454
468
|
if count>0
|
455
469
|
mean = e/count
|
456
|
-
fitting[centre]
|
470
|
+
if fitting[centre-1]
|
471
|
+
if fitting[centre-1] > mean # monotonic fitting
|
472
|
+
fitting[centre] = fitting[centre-1]
|
473
|
+
else
|
474
|
+
fitting[centre] = mean
|
475
|
+
end
|
476
|
+
else
|
477
|
+
fitting[centre] = mean
|
478
|
+
end
|
479
|
+
end
|
480
|
+
end
|
481
|
+
# output fitting data
|
482
|
+
File.open("fitting_data", "w") do |io|
|
483
|
+
fitting.each do |centre, mean|
|
484
|
+
io.write "#{centre}\t#{mean}\n"
|
457
485
|
end
|
458
486
|
end
|
487
|
+
#
|
459
488
|
hits = 0
|
460
489
|
@missed.each_pair do |id, list|
|
461
490
|
list.each do |hit|
|
@@ -531,6 +560,8 @@ module CRB_Blast
|
|
531
560
|
def write_output
|
532
561
|
s=""
|
533
562
|
unless @reciprocals.nil?
|
563
|
+
s << "query\ttarget\tid\talnlen\tevalue\tbitscore\t"
|
564
|
+
s << "qstart..qend\ttstart..tend\tqlen\ttlen\n"
|
534
565
|
@reciprocals.each_pair do |query_id, hits|
|
535
566
|
hits.each do |hit|
|
536
567
|
s << "#{hit}\n"
|
data/lib/crb-blast/version.rb
CHANGED