crb-blast 0.6.6 → 0.6.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 275e3d2996f91648e9f046cd7a08c4c663243765
4
- data.tar.gz: 4de455ec18f7c657585326d94e43dc761403cf77
3
+ metadata.gz: 0667a895d64d87a7ec7078342b9449d185d5fc8c
4
+ data.tar.gz: a0b4d4ffc83573e180beb7c84e778d21c3250155
5
5
  SHA512:
6
- metadata.gz: b2725a63477308471676fbdabfd035fc83a469adede3ce56d49bd9020bf779ad53f6f3edec65e6ea744666dbe1df879116248f6ecfa8bdfa86dfdd4b393c2ca4
7
- data.tar.gz: 6a731db6a14a8b8c24394891fe3335082e5b664e44314d18324f1072e76a78f37ec3152e459dc0500b293e836958bb84a6e9733961a77fb87a4cda16ee043d56
6
+ metadata.gz: c08e2203df9f0a077a14373b3e7f6a7b1b8d5281a7e55754448befe31af46f1247d66a7251c402610214f029f95ad02ae62074b431a9aefbc03d62b4eb8a5dd9
7
+ data.tar.gz: 8233a2eab64a5fceb841d601904b570eb160de41a6ddeeba5ea80a669823d3da9b15d9e0b095e37d6b47e09ae43cd1261028d33f4037d56dea6c9f91fdcc37e1
data/README.md CHANGED
@@ -82,6 +82,23 @@ blaster.find_reciprocals
82
82
  blaster.find_secondaries
83
83
  ```
84
84
 
85
+ ### Output
86
+
87
+ The output file for CRB-Blast pulls columns from the blast output.
88
+
89
+ ```
90
+ query - the name of the transcript from the 'query' fasta file
91
+ target - the name of the transcript from the 'target' fasta file
92
+ id - the percent sequence identity
93
+ alnlen - the alignment length
94
+ evalue - the blast evalue
95
+ bitscore - the blast bitscore
96
+ qstart..qend - the coordinates of the alignment in the query from start to end
97
+ tstart..tend - the coordinates of the alignment in the target from start to end
98
+ qlen - the length of the query transcript
99
+ tlen - the length of the target transcript
100
+ ```
101
+
85
102
  ### Getting help
86
103
 
87
104
  Please use the issue tracker if you find bugs or have trouble running CRB-BLAST.
@@ -50,6 +50,9 @@ EOS
50
50
  opt :split,
51
51
  "split the fasta files into chunks and run multiple blast jobs and then"+
52
52
  " combine them."
53
+
54
+ opt :verbose,
55
+ "be verbose"
53
56
  end
54
57
 
55
58
  Trollop::die :query, "must exist" if !File.exist?(opts[:query])
@@ -60,12 +63,27 @@ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
60
63
  Bindeps.require gem_deps
61
64
 
62
65
  blaster = CRB_Blast::CRB_Blast.new(opts.query, opts.target)
66
+ print "Making blast databases..." if opts.verbose
63
67
  dbs = blaster.makedb
68
+ puts " Done" if opts.verbose
69
+
70
+ print "Blasting..." if opts.verbose
64
71
  run = blaster.run_blast(opts.evalue, opts.threads, opts.split)
72
+ puts " Done" if opts.verbose
73
+
74
+ print "Loading..." if opts.verbose
65
75
  load = blaster.load_outputs
76
+ puts " Done" if opts.verbose
77
+
78
+ print "Finding reciprocals..." if opts.verbose
66
79
  recips = blaster.find_reciprocals
80
+ puts " Done" if opts.verbose
81
+
82
+ print "Fitting curve..." if opts.verbose
67
83
  secondaries = blaster.find_secondaries
84
+ puts " Done" if opts.verbose
68
85
 
86
+ print "Writing output..." if opts.verbose
69
87
  File.open("#{opts.output}", 'w') do |out|
70
88
  blaster.reciprocals.each_pair do |query_id, hits|
71
89
  hits.each do |hit|
@@ -73,3 +91,4 @@ File.open("#{opts.output}", 'w') do |out|
73
91
  end
74
92
  end
75
93
  end
94
+ puts " Done" if opts.verbose
@@ -24,6 +24,7 @@ module CRB_Blast
24
24
  attr_accessor :missed
25
25
  attr_accessor :target_is_prot, :query_is_prot
26
26
  attr_accessor :query_results, :target_results, :working_dir
27
+ attr_reader :reciprocal_hits
27
28
 
28
29
  def initialize query, target, output=nil
29
30
  raise IOError.new("File not found #{query}") if !File.exist?(query)
@@ -189,12 +190,22 @@ module CRB_Blast
189
190
  cmd1 << " -out #{@output1} -evalue #{evalue} "
190
191
  cmd1 << " -outfmt \"6 std qlen slen\" "
191
192
  cmd1 << " -max_target_seqs 50 "
193
+ if bin1=="blastn"
194
+ cmd1 << " -dust no "
195
+ elsif bin1=~/blastx/ or bin1=~/blastp/ or bin1=~/tblastn/
196
+ cmd1 << " -seg no "
197
+ end
192
198
  cmd1 << " -num_threads #{threads}"
193
199
 
194
200
  cmd2 = "#{bin2} -query #{@target} -db #{@working_dir}/#{@query_name} "
195
201
  cmd2 << " -out #{@output2} -evalue #{evalue} "
196
202
  cmd2 << " -outfmt \"6 std qlen slen\" "
197
203
  cmd2 << " -max_target_seqs 50 "
204
+ if bin2=="blastn"
205
+ cmd2 << " -dust no "
206
+ elsif bin2=~/blastx/ or bin2=~/blastp/ or bin2=~/tblastn/
207
+ cmd2 << " -seg no "
208
+ end
198
209
  cmd2 << " -num_threads #{threads}"
199
210
  if !File.exist?("#{@output1}")
200
211
  blast1 = Cmd.new(cmd1)
@@ -432,9 +443,12 @@ module CRB_Blast
432
443
  else
433
444
  length_hash = Hash.new
434
445
  fitting = Hash.new
435
- @evalues.each do |h|
436
- length_hash[h[:length]] = [] if !length_hash.key?(h[:length])
437
- length_hash[h[:length]] << h
446
+ File.open("evalues_data", "w") do |io|
447
+ @evalues.each do |h|
448
+ length_hash[h[:length]] = [] if !length_hash.key?(h[:length])
449
+ length_hash[h[:length]] << h
450
+ io.write "#{h[:length]}\t#{h[:e]}\n"
451
+ end
438
452
  end
439
453
 
440
454
  (10..@longest).each do |centre|
@@ -453,9 +467,24 @@ module CRB_Blast
453
467
  end
454
468
  if count>0
455
469
  mean = e/count
456
- fitting[centre] = mean
470
+ if fitting[centre-1]
471
+ if fitting[centre-1] > mean # monotonic fitting
472
+ fitting[centre] = fitting[centre-1]
473
+ else
474
+ fitting[centre] = mean
475
+ end
476
+ else
477
+ fitting[centre] = mean
478
+ end
479
+ end
480
+ end
481
+ # output fitting data
482
+ File.open("fitting_data", "w") do |io|
483
+ fitting.each do |centre, mean|
484
+ io.write "#{centre}\t#{mean}\n"
457
485
  end
458
486
  end
487
+ #
459
488
  hits = 0
460
489
  @missed.each_pair do |id, list|
461
490
  list.each do |hit|
@@ -531,6 +560,8 @@ module CRB_Blast
531
560
  def write_output
532
561
  s=""
533
562
  unless @reciprocals.nil?
563
+ s << "query\ttarget\tid\talnlen\tevalue\tbitscore\t"
564
+ s << "qstart..qend\ttstart..tend\tqlen\ttlen\n"
534
565
  @reciprocals.each_pair do |query_id, hits|
535
566
  hits.each do |hit|
536
567
  s << "#{hit}\n"
@@ -3,7 +3,7 @@ module CRB_Blast
3
3
  module VERSION
4
4
  MAJOR = 0
5
5
  MINOR = 6
6
- PATCH = 6
6
+ PATCH = 8
7
7
  BUILD = nil
8
8
 
9
9
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: crb-blast
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.6
4
+ version: 0.6.8
5
5
  platform: ruby
6
6
  authors:
7
7
  - Chris Boursnell