transrate 1.0.0.beta1 → 1.0.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,102 +0,0 @@
1
-
2
- module Transrate
3
-
4
- class ExpressError < StandardError
5
- end
6
-
7
- class Express
8
-
9
- require 'ostruct'
10
-
11
- attr_reader :fin_output
12
-
13
- # return an Express object
14
- def initialize
15
- which = Cmd.new('which express')
16
- which.run
17
- if !which.status.success?
18
- raise ExpressError.new("could not find express in the path")
19
- end
20
- @express = which.stdout.split("\n").first
21
- end
22
-
23
- # return struct containing:
24
- # results_file => path to the express results TSV
25
- # expression => a hash of target => effective_count
26
- # align_samp => path to the sampled alignments file
27
- def run assembly, bamfile
28
- assembly = assembly.file if assembly.is_a? Assembly
29
-
30
- ex_output = 'results.xprs'
31
- @fin_output = "#{File.basename assembly}_#{ex_output}"
32
-
33
- unless File.exists? @fin_output
34
- runner = Cmd.new build_command(assembly, bamfile)
35
- runner.run
36
- unless runner.status.success?
37
- logger.warn "express failed. cleaning sam file and trying again"
38
- File.delete("hits.1.samp.bam")
39
- fix_problem_snap_output bamfile
40
- runner.run
41
- unless runner.status.success?
42
- abort "express failed on the cleaned sam file\n#{runner.stderr}"
43
- end
44
- end
45
- File.rename(ex_output, @fin_output)
46
- end
47
- return 'hits.1.samp.bam'
48
- end
49
-
50
- # return the constructed eXpress command
51
- def build_command assembly, bamfile
52
- cmd = "#{@express}"
53
- cmd << " --output-dir ."
54
- cmd << " --output-align-samp"
55
- cmd << " --no-update-check"
56
- cmd << " --additional-online 1"
57
- cmd << " #{File.expand_path assembly}"
58
- cmd << " #{File.expand_path bamfile}"
59
- cmd
60
- end
61
-
62
- # return a hash of target => effective_count created
63
- # by parsing the results file
64
- def load_expression file
65
- expression = {}
66
- first = true
67
- File.open(file).each do |line|
68
- if first # skip header line
69
- first = false
70
- next
71
- end
72
- line = line.chomp.split("\t")
73
- target = line[1]
74
- effective_length = line[3]
75
- effective_count = line[7]
76
- tpm = line[14]
77
- expression[target] = {
78
- :eff_len => effective_length.to_i,
79
- :eff_count => effective_count.to_f,
80
- :tpm => tpm.to_f
81
- }
82
- end
83
- expression
84
- end
85
-
86
- def fix_problem_snap_output bam
87
- # express failed, probably because of temporary snap error
88
- # convert bam to sam
89
- sam = "#{File.expand_path(File.basename(bam, File.extname(bam)))}.sam"
90
- Samtools.run "view -h #{bam} > #{sam}"
91
- # run sam fixer on sam
92
- checker = SamChecker.new
93
- fixed_sam = "#{File.expand_path(File.basename(sam, File.extname(sam)))}.fixed.sam"
94
- checker.fix_sam(sam, fixed_sam)
95
- # convert sam to bam
96
- Samtools.run "view -bS #{fixed_sam} > #{bam}"
97
- bam
98
- end
99
-
100
- end # Express
101
-
102
- end # Transrate
@@ -1,74 +0,0 @@
1
- module Transrate
2
-
3
- class SamChecker
4
-
5
- def initialize
6
- @contigs = {}
7
- @reference = ""
8
- @count = 0
9
- @percent = 0
10
- @first = true
11
- end
12
-
13
- def check sam
14
- cols = sam.split("\t")
15
-
16
- reference = cols[2]
17
- length = @contigs[reference]
18
-
19
- seq_length = cols[9].length
20
- position = cols[3].to_i
21
- cigar = cols[5]
22
- # this generates a list of pairs in the form [ ["10", "M"], ["1", "D"] ]
23
- list = cigar.split(/[MDIS]/).zip(cigar.scan(/[MDIS]/))
24
- list.each_with_index do |a, i|
25
- c=a[0].to_i
26
- t=a[1]
27
- if t=="M" or t=="D"
28
- position += c
29
- elsif i==0 and t=="S"
30
- position += c
31
- end
32
- end
33
- if position > length + 1
34
- return false
35
- else
36
- return true
37
- end
38
- end
39
-
40
- def fix_sam input, output
41
- sam1 = ""
42
- File.open("#{output}", "wb") do |out|
43
- File.open("#{input}").each_line do |sam|
44
- if sam =~ /^@/
45
- # header
46
- # @SQ SN:Locus_1_Transcript_13/342_Confidence_1.000_Length_1605 LN:1605
47
- if sam[0..2]=="@SQ"
48
- cols = sam.split("\t")
49
- name = cols[1][3..-1]
50
- length = cols[2][3..-1].to_i
51
- @contigs[name] = length
52
- end
53
- out.write sam
54
- else
55
- # alignment
56
- if @first
57
- sam1 = sam.dup
58
- @first = false
59
- else
60
- if check(sam1) and check(sam)
61
- out.write(sam1)
62
- out.write(sam)
63
- end
64
- @first = true
65
- end
66
- @count+=1
67
- end
68
- end
69
- end
70
- end
71
-
72
- end
73
-
74
- end
@@ -1,146 +0,0 @@
1
- module Transrate
2
-
3
- class Samtools
4
-
5
- class SamtoolsError < StandardError; end
6
-
7
- # Get the path to the samtools binary built when bio-samtools
8
- # was installed
9
- def self.path
10
- if !@path
11
- which_samtools = Cmd.new("which samtools")
12
- which_samtools.run
13
- if !which_samtools.status.success?
14
- raise SamtoolsError.new("could not find samtools in the path")
15
- end
16
- @path = which_samtools.stdout.split("\n").first
17
- end
18
- return @path
19
- end
20
-
21
- # Run a samtools command
22
- def self.run cmd
23
- runcmd = Cmd.new "#{Samtools.path} #{cmd}"
24
- runcmd.run
25
- if !runcmd.status.success?
26
- raise SamtoolsError.new("Samtools command failed: #{runcmd}" +
27
- "\n#{runcmd.stderr}" +
28
- "\n#{runcmd.stdout}")
29
- end
30
- runcmd.stdout
31
- end
32
-
33
- # Convert a sam file to a bam file, returning the path to the bamfile
34
- def self.sam_to_bam samfile
35
- bamfile = File.basename(samfile, '.sam') + '.bam'
36
- bamfile = File.expand_path bamfile
37
- if !File.exist?(bamfile)
38
- Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
39
- end
40
- bamfile
41
- end
42
-
43
- # Sort a bam file, returning the path to the sorted bamfile
44
- def self.sort_bam bamfile, threads=4
45
- # the sort command behaves inconsistently with the other commands:
46
- # it takes an output prefix rather than a filename
47
- # and automatically adds the .bam extension
48
- sorted = File.basename(bamfile, '.bam') + '.sorted'
49
- if !File.exist?("#{sorted}.bam")
50
- cmd = "sort"
51
- cmd << " -@ #{threads}"
52
- cmd << " #{File.expand_path bamfile} #{sorted}"
53
- Samtools.run cmd
54
- end
55
- File.expand_path(sorted + '.bam')
56
- end
57
-
58
- # Sort a bam file by readname only, returning the path to th
59
- # sorted bamfile
60
- def self.readsort_bam bamfile, threads=4
61
- # the sort command behaves inconsistently with the other commands:
62
- # it takes an output prefix rather than a filename
63
- # and automatically adds the .bam extension
64
- sorted = File.basename(bamfile, '.bam') + '.readsorted'
65
- if !File.exist?("#{sorted}.bam")
66
- cmd = "sort"
67
- cmd << " -@ #{threads}"
68
- cmd << " -n" # sort by read name only
69
- cmd << " #{File.expand_path bamfile} #{sorted}"
70
- Samtools.run cmd
71
- end
72
- File.expand_path(sorted + '.bam')
73
- end
74
-
75
-
76
- # Index a bamfile, returning the path to the index
77
- def self.index_bam bamfile
78
- index = File.basename(bamfile, '.bam') + '.bai'
79
- index = File.expand_path index
80
- Samtools.run "index #{File.expand_path bamfile} #{index}"
81
- index
82
- end
83
-
84
- # Convert a sam file to bam, sort and index the bam, returning
85
- # an array of paths to the bamfile, sorted bamfile and index respectively
86
- def self.sam_to_sorted_indexed_bam samfile
87
- bamfile = Samtools.sam_to_bam samfile
88
- sorted = Samtools.sort_bam bamfile
89
- index = Samtools.index_bam bamfile
90
- [bamfile, sorted, index]
91
- end
92
-
93
- # Calculate per-base coverage from a sorted, indexed bam file
94
- # return the path to the coverage file
95
- def self.coverage bam
96
- outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
97
- if !File.exist?(outfile)
98
- cmd = "mpileup"
99
- cmd += " -f #{File.expand_path bam.fasta}" # reference
100
- cmd += " -B" # don't calculate BAQ quality scores
101
- cmd += " -Q0" # include all reads ignoring quality
102
- cmd += " -I" # don't do genotype calculations
103
- cmd += " #{File.expand_path bam.bam}" # the bam file
104
- cmd += " > #{outfile}"
105
- Samtools.run cmd
106
- end
107
- outfile
108
- end
109
-
110
- # Calculate per-base coverage and mapQ score from a sorted, indexed
111
- # bam file. Return the path to the coverage file.
112
- def self.bam_to_bcf(bam, fasta)
113
- outfile = File.expand_path "#{File.basename(fasta)}.bcf"
114
- if !File.exist?(outfile)
115
- cmd = "samtools mpileup"
116
- cmd << " -f #{File.expand_path fasta}" # reference
117
- cmd << " -B" # don't calculate BAQ quality scores
118
- cmd << " -q0" # include all multimapping reads
119
- cmd << " -Q0" # include all reads ignoring quality
120
- cmd << " -I" # don't do genotype calculations
121
- cmd << " -u" # output uncompressed bcf format
122
- cmd << " #{File.expand_path bam}" # the bam file
123
- cmd << " | bcftools view -cg - "
124
- cmd << " > #{outfile}"
125
- mpileup = Cmd.new cmd
126
- mpileup.run
127
- if !mpileup.status.success?
128
- raise RuntimeError.new("samtools and bcftools failed")
129
- end
130
- end
131
- outfile
132
- end
133
-
134
- def self.merge_bam left, right, out, threads=1
135
- cmd = "merge"
136
- cmd << " -@ #{threads}"
137
- cmd << " #{out}"
138
- cmd << " #{left}"
139
- cmd << " #{right}"
140
- Samtools.run cmd
141
- out
142
- end
143
-
144
- end
145
-
146
- end
@@ -1,5 +0,0 @@
1
- bundle_id target_id length eff_length tot_counts uniq_counts est_counts eff_counts ambig_distr_alpha ambig_distr_beta fpkm fpkm_conf_low fpkm_conf_high solvable tpm
2
- 1 C291600 261 54.369218 10 10 10.000000 48.005105 0.000000e+00 0.000000e+00 5.410108e+00 5.410108e+00 5.410108e+00 T 5.417487e+00
3
- 2 C196710 138 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
4
- 3 C378763 1364 1096.889202 195 195 195.000000 242.485749 0.000000e+00 0.000000e+00 5.229148e+00 5.183800e+00 5.274496e+00 T 5.236279e+00
5
- 4 C132376 100 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
data/test/test_express.rb DELETED
@@ -1,22 +0,0 @@
1
- require 'helper'
2
- require 'tmpdir'
3
-
4
- class TestExpress < Test::Unit::TestCase
5
-
6
- context "Express" do
7
-
8
- should "load an expression file" do
9
- file = File.join(File.dirname(__FILE__), 'data',
10
- 'express_results.xprs')
11
- e = Transrate::Express.new
12
- results = e.load_expression file
13
- assert_equal 4, results.size, "should be four results loaded"
14
- assert_equal 54, results['C291600'][:eff_len], "eff length is wrong"
15
- assert_equal 48.005105, results['C291600'][:eff_count],
16
- "eff count is wrong"
17
- assert_equal 5.417487e+00, results['C291600'][:tpm], "tpm is wrong"
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,22 +0,0 @@
1
- require 'helper'
2
-
3
- class TestSamtools < Test::Unit::TestCase
4
-
5
- context "samtools" do
6
-
7
- should "know the path to samtools binary" do
8
- msg = /Program: samtools/
9
- path = Transrate::Samtools.path
10
- res = `#{path} 2>&1`.split("\n").join
11
- assert msg =~ res
12
- end
13
-
14
- should "run commands" do
15
- sam = File.join(File.dirname(__FILE__), 'data', 'tiny.sam')
16
- Transrate::Samtools.run "view -bS #{sam} > tiny.bam"
17
- assert_equal 460, File.size('tiny.bam'), 'bam file should be created'
18
- File.delete 'tiny.bam'
19
- end
20
-
21
- end
22
- end