crb-blast 0.6.6 → 0.6.8

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 275e3d2996f91648e9f046cd7a08c4c663243765
4
- data.tar.gz: 4de455ec18f7c657585326d94e43dc761403cf77
3
+ metadata.gz: 0667a895d64d87a7ec7078342b9449d185d5fc8c
4
+ data.tar.gz: a0b4d4ffc83573e180beb7c84e778d21c3250155
5
5
  SHA512:
6
- metadata.gz: b2725a63477308471676fbdabfd035fc83a469adede3ce56d49bd9020bf779ad53f6f3edec65e6ea744666dbe1df879116248f6ecfa8bdfa86dfdd4b393c2ca4
7
- data.tar.gz: 6a731db6a14a8b8c24394891fe3335082e5b664e44314d18324f1072e76a78f37ec3152e459dc0500b293e836958bb84a6e9733961a77fb87a4cda16ee043d56
6
+ metadata.gz: c08e2203df9f0a077a14373b3e7f6a7b1b8d5281a7e55754448befe31af46f1247d66a7251c402610214f029f95ad02ae62074b431a9aefbc03d62b4eb8a5dd9
7
+ data.tar.gz: 8233a2eab64a5fceb841d601904b570eb160de41a6ddeeba5ea80a669823d3da9b15d9e0b095e37d6b47e09ae43cd1261028d33f4037d56dea6c9f91fdcc37e1
data/README.md CHANGED
@@ -82,6 +82,23 @@ blaster.find_reciprocals
82
82
  blaster.find_secondaries
83
83
  ```
84
84
 
85
+ ### Output
86
+
87
+ The output file for CRB-Blast pulls columns from the blast output.
88
+
89
+ ```
90
+ query - the name of the transcript from the 'query' fasta file
91
+ target - the name of the transcript from the 'target' fasta file
92
+ id - the percent sequence identity
93
+ alnlen - the alignment length
94
+ evalue - the blast evalue
95
+ bitscore - the blast bitscore
96
+ qstart..qend - the coordinates of the alignment in the query from start to end
97
+ tstart..tend - the coordinates of the alignment in the target from start to end
98
+ qlen - the length of the query transcript
99
+ tlen - the length of the target transcript
100
+ ```
101
+
85
102
  ### Getting help
86
103
 
87
104
  Please use the issue tracker if you find bugs or have trouble running CRB-BLAST.
@@ -50,6 +50,9 @@ EOS
50
50
  opt :split,
51
51
  "split the fasta files into chunks and run multiple blast jobs and then"+
52
52
  " combine them."
53
+
54
+ opt :verbose,
55
+ "be verbose"
53
56
  end
54
57
 
55
58
  Trollop::die :query, "must exist" if !File.exist?(opts[:query])
@@ -60,12 +63,27 @@ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
60
63
  Bindeps.require gem_deps
61
64
 
62
65
  blaster = CRB_Blast::CRB_Blast.new(opts.query, opts.target)
66
+ print "Making blast databases..." if opts.verbose
63
67
  dbs = blaster.makedb
68
+ puts " Done" if opts.verbose
69
+
70
+ print "Blasting..." if opts.verbose
64
71
  run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
72
+ puts " Done" if opts.verbose
73
+
74
+ print "Loading..." if opts.verbose
65
75
  load = blaster.load_outputs
76
+ puts " Done" if opts.verbose
77
+
78
+ print "Finding reciprocals..." if opts.verbose
66
79
  recips = blaster.find_reciprocals
80
+ puts " Done" if opts.verbose
81
+
82
+ print "Fitting curve..." if opts.verbose
67
83
  secondaries = blaster.find_secondaries
84
+ puts " Done" if opts.verbose
68
85
 
86
+ print "Writing output..." if opts.verbose
69
87
  File.open("#{opts.output}", 'w') do |out|
70
88
  blaster.reciprocals.each_pair do |query_id, hits|
71
89
  hits.each do |hit|
@@ -73,3 +91,4 @@ File.open("#{opts.output}", 'w') do |out|
73
91
  end
74
92
  end
75
93
  end
94
+ puts " Done" if opts.verbose
@@ -24,6 +24,7 @@ module CRB_Blast
24
24
  attr_accessor :missed
25
25
  attr_accessor :target_is_prot, :query_is_prot
26
26
  attr_accessor :query_results, :target_results, :working_dir
27
+ attr_reader :reciprocal_hits
27
28
 
28
29
  def initialize query, target, output=nil
29
30
  raise IOError.new("File not found #{query}") if !File.exist?(query)
@@ -189,12 +190,22 @@ module CRB_Blast
189
190
  cmd1 << " -out #{@output1} -evalue #{evalue} "
190
191
  cmd1 << " -outfmt \"6 std qlen slen\" "
191
192
  cmd1 << " -max_target_seqs 50 "
193
+ if bin1=="blastn"
194
+ cmd1 << " -dust no "
195
+ elsif bin1=~/blastx/ or bin1=~/blastp/ or bin1=~/tblastn/
196
+ cmd1 << " -seg no "
197
+ end
192
198
  cmd1 << " -num_threads #{threads}"
193
199
 
194
200
  cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
195
201
  cmd2 << " -out #{@output2} -evalue #{evalue} "
196
202
  cmd2 << " -outfmt \"6 std qlen slen\" "
197
203
  cmd2 << " -max_target_seqs 50 "
204
+ if bin2=="blastn"
205
+ cmd2 << " -dust no "
206
+ elsif bin2=~/blastx/ or bin2=~/blastp/ or bin2=~/tblastn/
207
+ cmd2 << " -seg no "
208
+ end
198
209
  cmd2 << " -num_threads #{threads}"
199
210
  if !File.exist?("#{@output1}")
200
211
  blast1 = Cmd.new(cmd1)
@@ -432,9 +443,12 @@ module CRB_Blast
432
443
  else
433
444
  length_hash = Hash.new
434
445
  fitting = Hash.new
435
- @evalues.each do |h|
436
- length_hash[h[:length]] = [] if !length_hash.key?(h[:length])
437
- length_hash[h[:length]] << h
446
+ File.open("evalues_data", "w") do |io|
447
+ @evalues.each do |h|
448
+ length_hash[h[:length]] = [] if !length_hash.key?(h[:length])
449
+ length_hash[h[:length]] << h
450
+ io.write "#{h[:length]}\t#{h[:e]}\n"
451
+ end
438
452
  end
439
453
 
440
454
  (10..@longest).each do |centre|
@@ -453,9 +467,24 @@ module CRB_Blast
453
467
  end
454
468
  if count>0
455
469
  mean = e/count
456
- fitting[centre] = mean
470
+ if fitting[centre-1]
471
+ if fitting[centre-1] > mean # monotonic fitting
472
+ fitting[centre] = fitting[centre-1]
473
+ else
474
+ fitting[centre] = mean
475
+ end
476
+ else
477
+ fitting[centre] = mean
478
+ end
479
+ end
480
+ end
481
+ # output fitting data
482
+ File.open("fitting_data", "w") do |io|
483
+ fitting.each do |centre, mean|
484
+ io.write "#{centre}\t#{mean}\n"
457
485
  end
458
486
  end
487
+ #
459
488
  hits = 0
460
489
  @missed.each_pair do |id, list|
461
490
  list.each do |hit|
@@ -531,6 +560,8 @@ module CRB_Blast
531
560
  def write_output
532
561
  s=""
533
562
  unless @reciprocals.nil?
563
+ s << "query\ttarget\tid\talnlen\tevalue\tbitscore\t"
564
+ s << "qstart..qend\ttstart..tend\tqlen\ttlen\n"
534
565
  @reciprocals.each_pair do |query_id, hits|
535
566
  hits.each do |hit|
536
567
  s << "#{hit}\n"
@@ -3,7 +3,7 @@ module CRB_Blast
3
3
  module VERSION
4
4
  MAJOR = 0
5
5
  MINOR = 6
6
- PATCH = 6
6
+ PATCH = 8
7
7
  BUILD = nil
8
8
 
9
9
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crb-blast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.6
4
+ version: 0.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Boursnell