transrate 1.0.0.beta1 → 1.0.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -11,7 +11,7 @@ module Transrate
11
11
  def initialize assembly
12
12
  @assembly = assembly
13
13
  @mapper = Snap.new
14
- @express = Express.new
14
+ @salmon = Salmon.new
15
15
  self.initial_values
16
16
 
17
17
  load_executables
@@ -19,7 +19,6 @@ module Transrate
19
19
  end
20
20
 
21
21
  def load_executables
22
- @bam_splitter = get_bin_path 'bam-split'
23
22
  @bam_reader = get_bin_path 'bam-read'
24
23
  end
25
24
 
@@ -27,7 +26,7 @@ module Transrate
27
26
  which_bin = Cmd.new("which #{bin}")
28
27
  which_bin.run
29
28
  if !which_bin.status.success?
30
- raise IOError.new("ReadMetrics: could not find #{bin} in path")
29
+ raise TransrateIOError.new("ReadMetrics: could not find #{bin} in path")
31
30
  end
32
31
  which_bin.stdout.split("\n").first
33
32
  end
@@ -35,10 +34,11 @@ module Transrate
35
34
  def run left, right, insertsize:200, insertsd:50, threads:8
36
35
  # check all read files exist
37
36
  [left, right].each do |readfile|
38
- raise IOError.new "Read file is nil" if readfile.nil?
37
+ raise TransrateIOError.new "Read file is nil" if readfile.nil?
39
38
  readfile.split(",").each do |file|
40
39
  unless File.exist? file
41
- raise IOError.new "ReadMetrics: read file does not exist: #{file}"
40
+ raise TransrateIOError.new "ReadMetrics: read file does not " +
41
+ "exist: #{file}"
42
42
  end
43
43
  end
44
44
  end
@@ -54,38 +54,18 @@ module Transrate
54
54
  threads: threads)
55
55
  @fragments = @mapper.read_count
56
56
 
57
- sorted_bam = "#{File.basename(bamfile, '.bam')}.merged.sorted.bam"
58
- merged_bam = "#{File.basename(bamfile, '.bam')}.merged.bam"
59
- assigned_bam = "hits.1.samp.bam"
60
- readsorted_bam = "#{File.basename(bamfile, '.bam')}.readsorted.bam"
61
- valid_bam = "#{File.basename(bamfile, '.bam')}.valid.bam"
62
- invalid_bam = "#{File.basename(bamfile, '.bam')}.invalid.bam"
57
+ assigned_bam = "postSample.bam"
58
+ final_bam = "#{File.basename(bamfile, '.bam')}.assigned.bam"
63
59
 
64
60
  # check for latest files first and create what is needed
65
- if !File.exist?(sorted_bam)
66
- if !File.exist?(merged_bam)
67
- if !File.exist?(assigned_bam)
68
- if !File.exist?(readsorted_bam)
69
- if !File.exist?(valid_bam)
70
- valid_bam, invalid_bam = split_bam bamfile
71
- end
72
- readsorted_bam = Samtools.readsort_bam(valid_bam)
73
- File.delete valid_bam
74
- end
75
- assigned_bam = assign_and_quantify readsorted_bam
76
- File.delete readsorted_bam
77
- end
78
- Samtools.merge_bam(invalid_bam, assigned_bam,
79
- merged_bam, threads=threads)
80
- File.delete invalid_bam
81
- File.delete assigned_bam
61
+ if !File.exist?(final_bam)
62
+ if !File.exist?(assigned_bam)
63
+ assigned_bam = assign_and_quantify bamfile
82
64
  end
83
- sorted_bam = Samtools.sort_bam(merged_bam, [4, threads].min)
84
- File.delete merged_bam
65
+ File.rename(assigned_bam, final_bam)
85
66
  end
86
-
87
67
  # analyse the final mappings
88
- analyse_read_mappings(sorted_bam, insertsize, insertsd, true)
68
+ analyse_read_mappings(final_bam, insertsize, insertsd, true)
89
69
 
90
70
  @has_run = true
91
71
  end
@@ -133,31 +113,12 @@ module Transrate
133
113
  read_length
134
114
  end
135
115
 
136
- def split_bam bamfile
137
- base = File.basename(bamfile, '.bam')
138
- valid = "#{base}.valid.bam"
139
- invalid = "#{base}.invalid.bam"
140
- if !File.exist? valid
141
- cmd = "#{@bam_splitter} #{bamfile}"
142
- splitter = Cmd.new cmd
143
- splitter.run
144
- if !splitter.status.success?
145
- raise StandardError.new "Couldn't split bam file: #{bamfile}" +
146
- "\n#{splitter.stdout}\n#{splitter.stderr}"
147
- end
148
- end
149
- if !File.exist? valid
150
- raise StandardError.new "Splitting failed to create valid bam: #{valid}"
151
- end
152
- [valid, invalid]
153
- end
154
-
155
116
  def assign_and_quantify bamfile
156
- express_bam = @express.run(@assembly, bamfile)
117
+ @salmon.run(@assembly, bamfile)
157
118
  end
158
119
 
159
- def analyse_expression express_output
160
- express_output.each_pair do |name, expr|
120
+ def analyse_expression salmon_output
121
+ salmon_output.each_pair do |name, expr|
161
122
  contig_name = Bio::FastaDefline.new(name.to_s).entry_id
162
123
  contig = @assembly[contig_name]
163
124
  if expr[:eff_len]==0
@@ -190,14 +151,14 @@ module Transrate
190
151
  end
191
152
  @bad = @fragments_mapped - @good
192
153
  else
193
- raise "couldn't find bamfile: #{bamfile}"
154
+ raise TransrateError.new "couldn't find bamfile: #{bamfile}"
194
155
  end
195
- express_results = "#{File.basename @assembly.file}_results.xprs"
156
+ salmon_results = "#{File.basename @assembly.file}_quant.sf"
196
157
 
197
- if File.exist?(express_results)
198
- analyse_expression(@express.load_expression(express_results))
158
+ if File.exist?(salmon_results)
159
+ analyse_expression(@salmon.load_expression(salmon_results))
199
160
  else
200
- abort "Can't find #{express_results}"
161
+ abort "Can't find #{salmon_results}"
201
162
  end
202
163
  @assembly.assembly.each_pair do |name, contig|
203
164
  @contigs_good += 1 if contig.score >= 0.5
@@ -228,7 +189,7 @@ module Transrate
228
189
  if !reader.status.success?
229
190
  msg = "Couldn't get information from bam file: #{bamfile}\n"
230
191
  msg << "#{reader.stdout}\n#{reader.stderr}"
231
- raise msg
192
+ raise TransrateError.new msg
232
193
  end
233
194
  end
234
195
  end
@@ -236,8 +197,7 @@ module Transrate
236
197
  def populate_contig_data row
237
198
  name = Bio::FastaDefline.new(row[:name].to_s).entry_id
238
199
  contig = @assembly[name]
239
- scale = 0.7
240
- contig.p_seq_true = (row[:p_seq_true] - scale) * (1.0 / (1 - scale))
200
+ contig.p_seq_true = row[:p_seq_true]
241
201
  contig.uncovered_bases = row[:bases_uncovered]
242
202
  @bases_uncovered += contig.uncovered_bases
243
203
  if row[:fragments_mapped] and row[:fragments_mapped] > 1
@@ -0,0 +1,67 @@
1
+ module Transrate
2
+
3
+ class SalmonError < TransrateError
4
+ end
5
+
6
+ class Salmon
7
+
8
+ def initialize
9
+ which = Cmd.new('which salmon')
10
+ which.run
11
+ if !which.status.success?
12
+ raise SalmonError.new("could not find salmon in the path")
13
+ end
14
+ @salmon = which.stdout.split("\n").first
15
+ end
16
+
17
+ def run assembly, bamfile, threads=8
18
+ assembly = assembly.file if assembly.is_a? Assembly
19
+ output = "quant.sf"
20
+ @fin_output = "#{File.basename assembly}_#{output}"
21
+ unless File.exist? @fin_output
22
+ salmon = Cmd.new build_command(assembly, bamfile, threads)
23
+ salmon.run
24
+ unless salmon.status.success?
25
+ logger.error salmon.stderr
26
+ raise SalmonError.new("Salmon failed")
27
+ end
28
+ File.rename(output, @fin_output)
29
+ end
30
+ return 'postSample.bam'
31
+ end
32
+
33
+ def build_command assembly, bamfile, threads=4
34
+ cmd = "#{@salmon} quant"
35
+ cmd << " --libType IU"
36
+ cmd << " --alignments #{bamfile}"
37
+ cmd << " --targets #{assembly}"
38
+ cmd << " --threads #{threads}"
39
+ cmd << " --sampleOut"
40
+ cmd << " --sampleUnaligned" # thanks Rob!
41
+ cmd << " --output ."
42
+ cmd
43
+ end
44
+
45
+ def load_expression file
46
+ expression = {}
47
+ File.open(file).each do |line|
48
+ if line !~ /^#/
49
+ line = line.chomp.split("\t")
50
+ target = line[0]
51
+ effective_length = line[1]
52
+ effective_count = line[4]
53
+ tpm = line[2]
54
+ expression[target] = {
55
+ :eff_len => effective_length.to_i,
56
+ :eff_count => effective_count.to_f,
57
+ :tpm => tpm.to_f
58
+ }
59
+ end
60
+ end
61
+ expression
62
+ end
63
+
64
+
65
+ end
66
+
67
+ end
@@ -1,6 +1,6 @@
1
1
  module Transrate
2
2
 
3
- class SnapError < StandardError
3
+ class SnapError < TransrateError
4
4
  end
5
5
 
6
6
  class Snap
@@ -8,7 +8,7 @@ module Transrate
8
8
  require 'fix-trinity-output'
9
9
  require 'bio'
10
10
 
11
- attr_reader :index_name, :sam, :read_count
11
+ attr_reader :index_name, :bam, :read_count
12
12
 
13
13
  def initialize
14
14
  which_snap = Cmd.new('which snap')
@@ -27,10 +27,6 @@ module Transrate
27
27
  l.split(",").zip(r.split(",")).each do |left, right|
28
28
  cmd << " #{left} #{right}"
29
29
  end
30
- # NOTE: do NOT turn on the -so flag (sort bam output)
31
- # it violates the basic assumption of eXpress's streaming
32
- # algorithm: that the fragments are observed in approximately
33
- # random order.
34
30
  cmd << " -o #{@bam}"
35
31
  cmd << " -s 0 1000" # min and max distance between paired-read starts
36
32
  cmd << " -H 300000" # max seed hits to consider in paired mode
@@ -39,7 +35,7 @@ module Transrate
39
35
  cmd << " -t #{threads}"
40
36
  cmd << " -b" # bind threads to cores
41
37
  cmd << " -M" # format cigar string
42
- cmd << " -D 5" # edit distance to search for mapq calculation
38
+ cmd << " -D 5" # extra edit distance to search. needed for -om
43
39
  cmd << " -om 5" # Output multiple alignments. extra edit distance
44
40
  cmd << " -omax 10" # max alignments per pair/read
45
41
  cmd
@@ -63,6 +59,7 @@ module Transrate
63
59
  save_readcount runner.stdout
64
60
  unless runner.status.success?
65
61
  if runner.stderr=~/Unmatched\sread\sIDs/
62
+ logger.warn runner.stderr
66
63
  logger.warn "Unmatched read IDs. Fixing input files..."
67
64
  remap_reads(left, right, threads)
68
65
  else
@@ -134,7 +131,6 @@ module Transrate
134
131
 
135
132
  def build_index file, threads
136
133
  @index_name = File.basename(file, File.extname(file))
137
- file = check_ambiguous(file)
138
134
  unless Dir.exists?(@index_name)
139
135
  cmd = "#{@snap} index #{file} #{@index_name}"
140
136
  cmd << " -s 23"
@@ -151,30 +147,6 @@ module Transrate
151
147
  @index_built = true
152
148
  end
153
149
 
154
- def check_ambiguous file
155
-
156
- ref = Bio::FastaFormat.open(file)
157
- ambiguous = false
158
- fixed = ""
159
- ref.each do |entry|
160
- seq = entry.seq
161
- if seq =~ /[RYSWKMBDHV]/
162
- seq = seq.gsub(/[RYSWKMBDHV]/, "N")
163
- ambiguous = true
164
- end
165
- fixed << ">#{entry.definition}\n#{seq}\n"
166
- end
167
- ref.close
168
- if ambiguous
169
- logger.warn "squelching ambiguous nucleotides"
170
- file = "#{File.basename(file, File.extname(file))}.fixed.fasta"
171
- File.open(file, "w") do |out|
172
- out.write fixed
173
- end
174
- end
175
- return file
176
- end
177
-
178
150
  end # Snap
179
151
 
180
152
  end # Transrate
@@ -32,7 +32,7 @@ module Transrate
32
32
  end
33
33
  @read_metrics = ReadMetrics.new @assembly
34
34
  else
35
- raise RuntimeError.new("assembly is nil")
35
+ raise TransrateError.new("assembly is nil")
36
36
  end
37
37
 
38
38
  if reference
@@ -11,7 +11,7 @@ module Transrate
11
11
  MAJOR = 1
12
12
  MINOR = 0
13
13
  PATCH = 0
14
- BUILD = 'beta1'
14
+ BUILD = 'beta2'
15
15
 
16
16
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
17
17
  end
data/lib/transrate.rb CHANGED
@@ -3,25 +3,15 @@ require 'rbconfig'
3
3
  require 'yell'
4
4
  RbConfig::CONFIG['CFLAGS'] = ''
5
5
 
6
- require 'transrate/transrater'
7
- require 'transrate/version'
8
- require 'transrate/contig'
9
- require 'transrate/assembly'
10
- require 'transrate/snap'
11
- require 'transrate/score_optimiser'
12
- require 'transrate/express'
13
- require 'transrate/read_metrics'
14
- require 'transrate/comparative_metrics'
15
- require 'transrate/contig_metrics'
16
- require 'transrate/samtools'
17
- require 'transrate/cmd'
18
- require 'transrate/sam_checker'
19
- require 'transrate/transrate.so'
20
-
21
6
  # Transrate is a comprehensive transcriptome assembly
22
7
  # quality assessment tool.
23
8
  module Transrate
24
9
 
10
+ # Our own set of errors to allow nice custom error handling
11
+ class TransrateError < StandardError; end
12
+ class TransrateIOError < TransrateError; end
13
+ class TransrateArgError < TransrateError; end
14
+
25
15
  # Create the universal logger and include it in Object
26
16
  # making the logger object available everywhere
27
17
  format = Yell::Formatter.new("[%5L] %d : %m", "%Y-%m-%d %H:%M:%S")
@@ -35,3 +25,16 @@ module Transrate
35
25
  Object.send :include, Yell::Loggable
36
26
 
37
27
  end # Transrate
28
+
29
+ require 'transrate/transrater'
30
+ require 'transrate/version'
31
+ require 'transrate/contig'
32
+ require 'transrate/assembly'
33
+ require 'transrate/snap'
34
+ require 'transrate/score_optimiser'
35
+ require 'transrate/salmon'
36
+ require 'transrate/read_metrics'
37
+ require 'transrate/comparative_metrics'
38
+ require 'transrate/contig_metrics'
39
+ require 'transrate/cmd'
40
+ require 'transrate/transrate.so'