transrate 1.0.0.beta1 → 1.0.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,102 +0,0 @@
1
-
2
- module Transrate
3
-
4
- class ExpressError < StandardError
5
- end
6
-
7
- class Express
8
-
9
- require 'ostruct'
10
-
11
- attr_reader :fin_output
12
-
13
- # return an Express object
14
- def initialize
15
- which = Cmd.new('which express')
16
- which.run
17
- if !which.status.success?
18
- raise ExpressError.new("could not find express in the path")
19
- end
20
- @express = which.stdout.split("\n").first
21
- end
22
-
23
- # return struct containing:
24
- # results_file => path to the express results TSV
25
- # expression => a hash of target => effective_count
26
- # align_samp => path to the sampled alignments file
27
- def run assembly, bamfile
28
- assembly = assembly.file if assembly.is_a? Assembly
29
-
30
- ex_output = 'results.xprs'
31
- @fin_output = "#{File.basename assembly}_#{ex_output}"
32
-
33
- unless File.exists? @fin_output
34
- runner = Cmd.new build_command(assembly, bamfile)
35
- runner.run
36
- unless runner.status.success?
37
- logger.warn "express failed. cleaning sam file and trying again"
38
- File.delete("hits.1.samp.bam")
39
- fix_problem_snap_output bamfile
40
- runner.run
41
- unless runner.status.success?
42
- abort "express failed on the cleaned sam file\n#{runner.stderr}"
43
- end
44
- end
45
- File.rename(ex_output, @fin_output)
46
- end
47
- return 'hits.1.samp.bam'
48
- end
49
-
50
- # return the constructed eXpress command
51
- def build_command assembly, bamfile
52
- cmd = "#{@express}"
53
- cmd << " --output-dir ."
54
- cmd << " --output-align-samp"
55
- cmd << " --no-update-check"
56
- cmd << " --additional-online 1"
57
- cmd << " #{File.expand_path assembly}"
58
- cmd << " #{File.expand_path bamfile}"
59
- cmd
60
- end
61
-
62
- # return a hash of target => effective_count created
63
- # by parsing the results file
64
- def load_expression file
65
- expression = {}
66
- first = true
67
- File.open(file).each do |line|
68
- if first # skip header line
69
- first = false
70
- next
71
- end
72
- line = line.chomp.split("\t")
73
- target = line[1]
74
- effective_length = line[3]
75
- effective_count = line[7]
76
- tpm = line[14]
77
- expression[target] = {
78
- :eff_len => effective_length.to_i,
79
- :eff_count => effective_count.to_f,
80
- :tpm => tpm.to_f
81
- }
82
- end
83
- expression
84
- end
85
-
86
- def fix_problem_snap_output bam
87
- # express failed, probably because of temporary snap error
88
- # convert bam to sam
89
- sam = "#{File.expand_path(File.basename(bam, File.extname(bam)))}.sam"
90
- Samtools.run "view -h #{bam} > #{sam}"
91
- # run sam fixer on sam
92
- checker = SamChecker.new
93
- fixed_sam = "#{File.expand_path(File.basename(sam, File.extname(sam)))}.fixed.sam"
94
- checker.fix_sam(sam, fixed_sam)
95
- # convert sam to bam
96
- Samtools.run "view -bS #{fixed_sam} > #{bam}"
97
- bam
98
- end
99
-
100
- end # Express
101
-
102
- end # Transrate
@@ -1,74 +0,0 @@
1
- module Transrate
2
-
3
- class SamChecker
4
-
5
- def initialize
6
- @contigs = {}
7
- @reference = ""
8
- @count = 0
9
- @percent = 0
10
- @first = true
11
- end
12
-
13
- def check sam
14
- cols = sam.split("\t")
15
-
16
- reference = cols[2]
17
- length = @contigs[reference]
18
-
19
- seq_length = cols[9].length
20
- position = cols[3].to_i
21
- cigar = cols[5]
22
- # this generates a list of pairs in the form [ ["10", "M"], ["1", "D"] ]
23
- list = cigar.split(/[MDIS]/).zip(cigar.scan(/[MDIS]/))
24
- list.each_with_index do |a, i|
25
- c=a[0].to_i
26
- t=a[1]
27
- if t=="M" or t=="D"
28
- position += c
29
- elsif i==0 and t=="S"
30
- position += c
31
- end
32
- end
33
- if position > length + 1
34
- return false
35
- else
36
- return true
37
- end
38
- end
39
-
40
- def fix_sam input, output
41
- sam1 = ""
42
- File.open("#{output}", "wb") do |out|
43
- File.open("#{input}").each_line do |sam|
44
- if sam =~ /^@/
45
- # header
46
- # @SQ SN:Locus_1_Transcript_13/342_Confidence_1.000_Length_1605 LN:1605
47
- if sam[0..2]=="@SQ"
48
- cols = sam.split("\t")
49
- name = cols[1][3..-1]
50
- length = cols[2][3..-1].to_i
51
- @contigs[name] = length
52
- end
53
- out.write sam
54
- else
55
- # alignment
56
- if @first
57
- sam1 = sam.dup
58
- @first = false
59
- else
60
- if check(sam1) and check(sam)
61
- out.write(sam1)
62
- out.write(sam)
63
- end
64
- @first = true
65
- end
66
- @count+=1
67
- end
68
- end
69
- end
70
- end
71
-
72
- end
73
-
74
- end
@@ -1,146 +0,0 @@
1
- module Transrate
2
-
3
- class Samtools
4
-
5
- class SamtoolsError < StandardError; end
6
-
7
- # Get the path to the samtools binary built when bio-samtools
8
- # was installed
9
- def self.path
10
- if !@path
11
- which_samtools = Cmd.new("which samtools")
12
- which_samtools.run
13
- if !which_samtools.status.success?
14
- raise SamtoolsError.new("could not find samtools in the path")
15
- end
16
- @path = which_samtools.stdout.split("\n").first
17
- end
18
- return @path
19
- end
20
-
21
- # Run a samtools command
22
- def self.run cmd
23
- runcmd = Cmd.new "#{Samtools.path} #{cmd}"
24
- runcmd.run
25
- if !runcmd.status.success?
26
- raise SamtoolsError.new("Samtools command failed: #{runcmd}" +
27
- "\n#{runcmd.stderr}" +
28
- "\n#{runcmd.stdout}")
29
- end
30
- runcmd.stdout
31
- end
32
-
33
- # Convert a sam file to a bam file, returning the path to the bamfile
34
- def self.sam_to_bam samfile
35
- bamfile = File.basename(samfile, '.sam') + '.bam'
36
- bamfile = File.expand_path bamfile
37
- if !File.exist?(bamfile)
38
- Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
39
- end
40
- bamfile
41
- end
42
-
43
- # Sort a bam file, returning the path to the sorted bamfile
44
- def self.sort_bam bamfile, threads=4
45
- # the sort command behaves inconsistently with the other commands:
46
- # it takes an output prefix rather than a filename
47
- # and automatically adds the .bam extension
48
- sorted = File.basename(bamfile, '.bam') + '.sorted'
49
- if !File.exist?("#{sorted}.bam")
50
- cmd = "sort"
51
- cmd << " -@ #{threads}"
52
- cmd << " #{File.expand_path bamfile} #{sorted}"
53
- Samtools.run cmd
54
- end
55
- File.expand_path(sorted + '.bam')
56
- end
57
-
58
- # Sort a bam file by readname only, returning the path to th
59
- # sorted bamfile
60
- def self.readsort_bam bamfile, threads=4
61
- # the sort command behaves inconsistently with the other commands:
62
- # it takes an output prefix rather than a filename
63
- # and automatically adds the .bam extension
64
- sorted = File.basename(bamfile, '.bam') + '.readsorted'
65
- if !File.exist?("#{sorted}.bam")
66
- cmd = "sort"
67
- cmd << " -@ #{threads}"
68
- cmd << " -n" # sort by read name only
69
- cmd << " #{File.expand_path bamfile} #{sorted}"
70
- Samtools.run cmd
71
- end
72
- File.expand_path(sorted + '.bam')
73
- end
74
-
75
-
76
- # Index a bamfile, returning the path to the index
77
- def self.index_bam bamfile
78
- index = File.basename(bamfile, '.bam') + '.bai'
79
- index = File.expand_path index
80
- Samtools.run "index #{File.expand_path bamfile} #{index}"
81
- index
82
- end
83
-
84
- # Convert a sam file to bam, sort and index the bam, returning
85
- # an array of paths to the bamfile, sorted bamfile and index respectively
86
- def self.sam_to_sorted_indexed_bam samfile
87
- bamfile = Samtools.sam_to_bam samfile
88
- sorted = Samtools.sort_bam bamfile
89
- index = Samtools.index_bam bamfile
90
- [bamfile, sorted, index]
91
- end
92
-
93
- # Calculate per-base coverage from a sorted, indexed bam file
94
- # return the path to the coverage file
95
- def self.coverage bam
96
- outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
97
- if !File.exist?(outfile)
98
- cmd = "mpileup"
99
- cmd += " -f #{File.expand_path bam.fasta}" # reference
100
- cmd += " -B" # don't calculate BAQ quality scores
101
- cmd += " -Q0" # include all reads ignoring quality
102
- cmd += " -I" # don't do genotype calculations
103
- cmd += " #{File.expand_path bam.bam}" # the bam file
104
- cmd += " > #{outfile}"
105
- Samtools.run cmd
106
- end
107
- outfile
108
- end
109
-
110
- # Calculate per-base coverage and mapQ score from a sorted, indexed
111
- # bam file. Return the path to the coverage file.
112
- def self.bam_to_bcf(bam, fasta)
113
- outfile = File.expand_path "#{File.basename(fasta)}.bcf"
114
- if !File.exist?(outfile)
115
- cmd = "samtools mpileup"
116
- cmd << " -f #{File.expand_path fasta}" # reference
117
- cmd << " -B" # don't calculate BAQ quality scores
118
- cmd << " -q0" # include all multimapping reads
119
- cmd << " -Q0" # include all reads ignoring quality
120
- cmd << " -I" # don't do genotype calculations
121
- cmd << " -u" # output uncompressed bcf format
122
- cmd << " #{File.expand_path bam}" # the bam file
123
- cmd << " | bcftools view -cg - "
124
- cmd << " > #{outfile}"
125
- mpileup = Cmd.new cmd
126
- mpileup.run
127
- if !mpileup.status.success?
128
- raise RuntimeError.new("samtools and bcftools failed")
129
- end
130
- end
131
- outfile
132
- end
133
-
134
- def self.merge_bam left, right, out, threads=1
135
- cmd = "merge"
136
- cmd << " -@ #{threads}"
137
- cmd << " #{out}"
138
- cmd << " #{left}"
139
- cmd << " #{right}"
140
- Samtools.run cmd
141
- out
142
- end
143
-
144
- end
145
-
146
- end
@@ -1,5 +0,0 @@
1
- bundle_id target_id length eff_length tot_counts uniq_counts est_counts eff_counts ambig_distr_alpha ambig_distr_beta fpkm fpkm_conf_low fpkm_conf_high solvable tpm
2
- 1 C291600 261 54.369218 10 10 10.000000 48.005105 0.000000e+00 0.000000e+00 5.410108e+00 5.410108e+00 5.410108e+00 T 5.417487e+00
3
- 2 C196710 138 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
4
- 3 C378763 1364 1096.889202 195 195 195.000000 242.485749 0.000000e+00 0.000000e+00 5.229148e+00 5.183800e+00 5.274496e+00 T 5.236279e+00
5
- 4 C132376 100 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
data/test/test_express.rb DELETED
@@ -1,22 +0,0 @@
1
- require 'helper'
2
- require 'tmpdir'
3
-
4
- class TestExpress < Test::Unit::TestCase
5
-
6
- context "Express" do
7
-
8
- should "load an expression file" do
9
- file = File.join(File.dirname(__FILE__), 'data',
10
- 'express_results.xprs')
11
- e = Transrate::Express.new
12
- results = e.load_expression file
13
- assert_equal 4, results.size, "should be four results loaded"
14
- assert_equal 54, results['C291600'][:eff_len], "eff length is wrong"
15
- assert_equal 48.005105, results['C291600'][:eff_count],
16
- "eff count is wrong"
17
- assert_equal 5.417487e+00, results['C291600'][:tpm], "tpm is wrong"
18
- end
19
-
20
- end
21
-
22
- end
@@ -1,22 +0,0 @@
1
- require 'helper'
2
-
3
- class TestSamtools < Test::Unit::TestCase
4
-
5
- context "samtools" do
6
-
7
- should "know the path to samtools binary" do
8
- msg = /Program: samtools/
9
- path = Transrate::Samtools.path
10
- res = `#{path} 2>&1`.split("\n").join
11
- assert msg =~ res
12
- end
13
-
14
- should "run commands" do
15
- sam = File.join(File.dirname(__FILE__), 'data', 'tiny.sam')
16
- Transrate::Samtools.run "view -bS #{sam} > tiny.bam"
17
- assert_equal 460, File.size('tiny.bam'), 'bam file should be created'
18
- File.delete 'tiny.bam'
19
- end
20
-
21
- end
22
- end