transrate 1.0.0.beta1 → 1.0.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
@@ -11,7 +11,7 @@ module Transrate
11
11
  def initialize assembly
12
12
  @assembly = assembly
13
13
  @mapper = Snap.new
14
- @express = Express.new
14
+ @salmon = Salmon.new
15
15
  self.initial_values
16
16
 
17
17
  load_executables
@@ -19,7 +19,6 @@ module Transrate
19
19
  end
20
20
 
21
21
  def load_executables
22
- @bam_splitter = get_bin_path 'bam-split'
23
22
  @bam_reader = get_bin_path 'bam-read'
24
23
  end
25
24
 
@@ -27,7 +26,7 @@ module Transrate
27
26
  which_bin = Cmd.new("which #{bin}")
28
27
  which_bin.run
29
28
  if !which_bin.status.success?
30
- raise IOError.new("ReadMetrics: could not find #{bin} in path")
29
+ raise TransrateIOError.new("ReadMetrics: could not find #{bin} in path")
31
30
  end
32
31
  which_bin.stdout.split("\n").first
33
32
  end
@@ -35,10 +34,11 @@ module Transrate
35
34
  def run left, right, insertsize:200, insertsd:50, threads:8
36
35
  # check all read files exist
37
36
  [left, right].each do |readfile|
38
- raise IOError.new "Read file is nil" if readfile.nil?
37
+ raise TransrateIOError.new "Read file is nil" if readfile.nil?
39
38
  readfile.split(",").each do |file|
40
39
  unless File.exist? file
41
- raise IOError.new "ReadMetrics: read file does not exist: #{file}"
40
+ raise TransrateIOError.new "ReadMetrics: read file does not " +
41
+ "exist: #{file}"
42
42
  end
43
43
  end
44
44
  end
@@ -54,38 +54,18 @@ module Transrate
54
54
  threads: threads)
55
55
  @fragments = @mapper.read_count
56
56
 
57
- sorted_bam = "#{File.basename(bamfile, '.bam')}.merged.sorted.bam"
58
- merged_bam = "#{File.basename(bamfile, '.bam')}.merged.bam"
59
- assigned_bam = "hits.1.samp.bam"
60
- readsorted_bam = "#{File.basename(bamfile, '.bam')}.readsorted.bam"
61
- valid_bam = "#{File.basename(bamfile, '.bam')}.valid.bam"
62
- invalid_bam = "#{File.basename(bamfile, '.bam')}.invalid.bam"
57
+ assigned_bam = "postSample.bam"
58
+ final_bam = "#{File.basename(bamfile, '.bam')}.assigned.bam"
63
59
 
64
60
  # check for latest files first and create what is needed
65
- if !File.exist?(sorted_bam)
66
- if !File.exist?(merged_bam)
67
- if !File.exist?(assigned_bam)
68
- if !File.exist?(readsorted_bam)
69
- if !File.exist?(valid_bam)
70
- valid_bam, invalid_bam = split_bam bamfile
71
- end
72
- readsorted_bam = Samtools.readsort_bam(valid_bam)
73
- File.delete valid_bam
74
- end
75
- assigned_bam = assign_and_quantify readsorted_bam
76
- File.delete readsorted_bam
77
- end
78
- Samtools.merge_bam(invalid_bam, assigned_bam,
79
- merged_bam, threads=threads)
80
- File.delete invalid_bam
81
- File.delete assigned_bam
61
+ if !File.exist?(final_bam)
62
+ if !File.exist?(assigned_bam)
63
+ assigned_bam = assign_and_quantify bamfile
82
64
  end
83
- sorted_bam = Samtools.sort_bam(merged_bam, [4, threads].min)
84
- File.delete merged_bam
65
+ File.rename(assigned_bam, final_bam)
85
66
  end
86
-
87
67
  # analyse the final mappings
88
- analyse_read_mappings(sorted_bam, insertsize, insertsd, true)
68
+ analyse_read_mappings(final_bam, insertsize, insertsd, true)
89
69
 
90
70
  @has_run = true
91
71
  end
@@ -133,31 +113,12 @@ module Transrate
133
113
  read_length
134
114
  end
135
115
 
136
- def split_bam bamfile
137
- base = File.basename(bamfile, '.bam')
138
- valid = "#{base}.valid.bam"
139
- invalid = "#{base}.invalid.bam"
140
- if !File.exist? valid
141
- cmd = "#{@bam_splitter} #{bamfile}"
142
- splitter = Cmd.new cmd
143
- splitter.run
144
- if !splitter.status.success?
145
- raise StandardError.new "Couldn't split bam file: #{bamfile}" +
146
- "\n#{splitter.stdout}\n#{splitter.stderr}"
147
- end
148
- end
149
- if !File.exist? valid
150
- raise StandardError.new "Splitting failed to create valid bam: #{valid}"
151
- end
152
- [valid, invalid]
153
- end
154
-
155
116
  def assign_and_quantify bamfile
156
- express_bam = @express.run(@assembly, bamfile)
117
+ @salmon.run(@assembly, bamfile)
157
118
  end
158
119
 
159
- def analyse_expression express_output
160
- express_output.each_pair do |name, expr|
120
+ def analyse_expression salmon_output
121
+ salmon_output.each_pair do |name, expr|
161
122
  contig_name = Bio::FastaDefline.new(name.to_s).entry_id
162
123
  contig = @assembly[contig_name]
163
124
  if expr[:eff_len]==0
@@ -190,14 +151,14 @@ module Transrate
190
151
  end
191
152
  @bad = @fragments_mapped - @good
192
153
  else
193
- raise "couldn't find bamfile: #{bamfile}"
154
+ raise TransrateError.new "couldn't find bamfile: #{bamfile}"
194
155
  end
195
- express_results = "#{File.basename @assembly.file}_results.xprs"
156
+ salmon_results = "#{File.basename @assembly.file}_quant.sf"
196
157
 
197
- if File.exist?(express_results)
198
- analyse_expression(@express.load_expression(express_results))
158
+ if File.exist?(salmon_results)
159
+ analyse_expression(@salmon.load_expression(salmon_results))
199
160
  else
200
- abort "Can't find #{express_results}"
161
+ abort "Can't find #{salmon_results}"
201
162
  end
202
163
  @assembly.assembly.each_pair do |name, contig|
203
164
  @contigs_good += 1 if contig.score >= 0.5
@@ -228,7 +189,7 @@ module Transrate
228
189
  if !reader.status.success?
229
190
  msg = "Couldn't get information from bam file: #{bamfile}\n"
230
191
  msg << "#{reader.stdout}\n#{reader.stderr}"
231
- raise msg
192
+ raise TransrateError.new msg
232
193
  end
233
194
  end
234
195
  end
@@ -236,8 +197,7 @@ module Transrate
236
197
  def populate_contig_data row
237
198
  name = Bio::FastaDefline.new(row[:name].to_s).entry_id
238
199
  contig = @assembly[name]
239
- scale = 0.7
240
- contig.p_seq_true = (row[:p_seq_true] - scale) * (1.0 / (1 - scale))
200
+ contig.p_seq_true = row[:p_seq_true]
241
201
  contig.uncovered_bases = row[:bases_uncovered]
242
202
  @bases_uncovered += contig.uncovered_bases
243
203
  if row[:fragments_mapped] and row[:fragments_mapped] > 1
@@ -0,0 +1,67 @@
1
+ module Transrate
2
+
3
+ class SalmonError < TransrateError
4
+ end
5
+
6
+ class Salmon
7
+
8
+ def initialize
9
+ which = Cmd.new('which salmon')
10
+ which.run
11
+ if !which.status.success?
12
+ raise SalmonError.new("could not find salmon in the path")
13
+ end
14
+ @salmon = which.stdout.split("\n").first
15
+ end
16
+
17
+ def run assembly, bamfile, threads=8
18
+ assembly = assembly.file if assembly.is_a? Assembly
19
+ output = "quant.sf"
20
+ @fin_output = "#{File.basename assembly}_#{output}"
21
+ unless File.exist? @fin_output
22
+ salmon = Cmd.new build_command(assembly, bamfile, threads)
23
+ salmon.run
24
+ unless salmon.status.success?
25
+ logger.error salmon.stderr
26
+ raise SalmonError.new("Salmon failed")
27
+ end
28
+ File.rename(output, @fin_output)
29
+ end
30
+ return 'postSample.bam'
31
+ end
32
+
33
+ def build_command assembly, bamfile, threads=4
34
+ cmd = "#{@salmon} quant"
35
+ cmd << " --libType IU"
36
+ cmd << " --alignments #{bamfile}"
37
+ cmd << " --targets #{assembly}"
38
+ cmd << " --threads #{threads}"
39
+ cmd << " --sampleOut"
40
+ cmd << " --sampleUnaligned" # thanks Rob!
41
+ cmd << " --output ."
42
+ cmd
43
+ end
44
+
45
+ def load_expression file
46
+ expression = {}
47
+ File.open(file).each do |line|
48
+ if line !~ /^#/
49
+ line = line.chomp.split("\t")
50
+ target = line[0]
51
+ effective_length = line[1]
52
+ effective_count = line[4]
53
+ tpm = line[2]
54
+ expression[target] = {
55
+ :eff_len => effective_length.to_i,
56
+ :eff_count => effective_count.to_f,
57
+ :tpm => tpm.to_f
58
+ }
59
+ end
60
+ end
61
+ expression
62
+ end
63
+
64
+
65
+ end
66
+
67
+ end
@@ -1,6 +1,6 @@
1
1
  module Transrate
2
2
 
3
- class SnapError < StandardError
3
+ class SnapError < TransrateError
4
4
  end
5
5
 
6
6
  class Snap
@@ -8,7 +8,7 @@ module Transrate
8
8
  require 'fix-trinity-output'
9
9
  require 'bio'
10
10
 
11
- attr_reader :index_name, :sam, :read_count
11
+ attr_reader :index_name, :bam, :read_count
12
12
 
13
13
  def initialize
14
14
  which_snap = Cmd.new('which snap')
@@ -27,10 +27,6 @@ module Transrate
27
27
  l.split(",").zip(r.split(",")).each do |left, right|
28
28
  cmd << " #{left} #{right}"
29
29
  end
30
- # NOTE: do NOT turn on the -so flag (sort bam output)
31
- # it violates the basic assumption of eXpress's streaming
32
- # algorithm: that the fragments are observed in approximately
33
- # random order.
34
30
  cmd << " -o #{@bam}"
35
31
  cmd << " -s 0 1000" # min and max distance between paired-read starts
36
32
  cmd << " -H 300000" # max seed hits to consider in paired mode
@@ -39,7 +35,7 @@ module Transrate
39
35
  cmd << " -t #{threads}"
40
36
  cmd << " -b" # bind threads to cores
41
37
  cmd << " -M" # format cigar string
42
- cmd << " -D 5" # edit distance to search for mapq calculation
38
+ cmd << " -D 5" # extra edit distance to search. needed for -om
43
39
  cmd << " -om 5" # Output multiple alignments. extra edit distance
44
40
  cmd << " -omax 10" # max alignments per pair/read
45
41
  cmd
@@ -63,6 +59,7 @@ module Transrate
63
59
  save_readcount runner.stdout
64
60
  unless runner.status.success?
65
61
  if runner.stderr=~/Unmatched\sread\sIDs/
62
+ logger.warn runner.stderr
66
63
  logger.warn "Unmatched read IDs. Fixing input files..."
67
64
  remap_reads(left, right, threads)
68
65
  else
@@ -134,7 +131,6 @@ module Transrate
134
131
 
135
132
  def build_index file, threads
136
133
  @index_name = File.basename(file, File.extname(file))
137
- file = check_ambiguous(file)
138
134
  unless Dir.exists?(@index_name)
139
135
  cmd = "#{@snap} index #{file} #{@index_name}"
140
136
  cmd << " -s 23"
@@ -151,30 +147,6 @@ module Transrate
151
147
  @index_built = true
152
148
  end
153
149
 
154
- def check_ambiguous file
155
-
156
- ref = Bio::FastaFormat.open(file)
157
- ambiguous = false
158
- fixed = ""
159
- ref.each do |entry|
160
- seq = entry.seq
161
- if seq =~ /[RYSWKMBDHV]/
162
- seq = seq.gsub(/[RYSWKMBDHV]/, "N")
163
- ambiguous = true
164
- end
165
- fixed << ">#{entry.definition}\n#{seq}\n"
166
- end
167
- ref.close
168
- if ambiguous
169
- logger.warn "squelching ambiguous nucleotides"
170
- file = "#{File.basename(file, File.extname(file))}.fixed.fasta"
171
- File.open(file, "w") do |out|
172
- out.write fixed
173
- end
174
- end
175
- return file
176
- end
177
-
178
150
  end # Snap
179
151
 
180
152
  end # Transrate
@@ -32,7 +32,7 @@ module Transrate
32
32
  end
33
33
  @read_metrics = ReadMetrics.new @assembly
34
34
  else
35
- raise RuntimeError.new("assembly is nil")
35
+ raise TransrateError.new("assembly is nil")
36
36
  end
37
37
 
38
38
  if reference
@@ -11,7 +11,7 @@ module Transrate
11
11
  MAJOR = 1
12
12
  MINOR = 0
13
13
  PATCH = 0
14
- BUILD = 'beta1'
14
+ BUILD = 'beta2'
15
15
 
16
16
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
17
17
  end
data/lib/transrate.rb CHANGED
@@ -3,25 +3,15 @@ require 'rbconfig'
3
3
  require 'yell'
4
4
  RbConfig::CONFIG['CFLAGS'] = ''
5
5
 
6
- require 'transrate/transrater'
7
- require 'transrate/version'
8
- require 'transrate/contig'
9
- require 'transrate/assembly'
10
- require 'transrate/snap'
11
- require 'transrate/score_optimiser'
12
- require 'transrate/express'
13
- require 'transrate/read_metrics'
14
- require 'transrate/comparative_metrics'
15
- require 'transrate/contig_metrics'
16
- require 'transrate/samtools'
17
- require 'transrate/cmd'
18
- require 'transrate/sam_checker'
19
- require 'transrate/transrate.so'
20
-
21
6
  # Transrate is a comprehensive transcriptome assembly
22
7
  # quality assessment tool.
23
8
  module Transrate
24
9
 
10
+ # Our own set of errors to allow nice custom error handling
11
+ class TransrateError < StandardError; end
12
+ class TransrateIOError < TransrateError; end
13
+ class TransrateArgError < TransrateError; end
14
+
25
15
  # Create the universal logger and include it in Object
26
16
  # making the logger object available everywhere
27
17
  format = Yell::Formatter.new("[%5L] %d : %m", "%Y-%m-%d %H:%M:%S")
@@ -35,3 +25,16 @@ module Transrate
35
25
  Object.send :include, Yell::Loggable
36
26
 
37
27
  end # Transrate
28
+
29
+ require 'transrate/transrater'
30
+ require 'transrate/version'
31
+ require 'transrate/contig'
32
+ require 'transrate/assembly'
33
+ require 'transrate/snap'
34
+ require 'transrate/score_optimiser'
35
+ require 'transrate/salmon'
36
+ require 'transrate/read_metrics'
37
+ require 'transrate/comparative_metrics'
38
+ require 'transrate/contig_metrics'
39
+ require 'transrate/cmd'
40
+ require 'transrate/transrate.so'