transrate 1.0.0.beta1 → 1.0.0.beta2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +8 -0
- data/CITATION +3 -0
- data/README.md +1 -1
- data/Rakefile +71 -0
- data/bin/transrate +92 -41
- data/deps/blast.yaml +27 -0
- data/deps/deps.yaml +36 -62
- data/ext/transrate/transrate.c +9 -0
- data/lib/transrate/assembly.rb +21 -11
- data/lib/transrate/comparative_metrics.rb +2 -2
- data/lib/transrate/contig.rb +8 -3
- data/lib/transrate/read_metrics.rb +22 -62
- data/lib/transrate/salmon.rb +67 -0
- data/lib/transrate/snap.rb +4 -32
- data/lib/transrate/transrater.rb +1 -1
- data/lib/transrate/version.rb +1 -1
- data/lib/transrate.rb +18 -15
- data/test/data/sorghum_100.fa +200 -0
- data/test/data/test.sf +30 -0
- data/test/helper.rb +13 -0
- data/test/test_assembly.rb +54 -0
- data/test/test_bin.rb +30 -27
- data/test/test_cmd.rb +5 -0
- data/test/test_contig.rb +9 -14
- data/test/test_read_metrics.rb +66 -42
- data/test/test_salmon.rb +33 -0
- data/test/test_snap.rb +27 -0
- data/test/test_transrater.rb +10 -10
- data/transrate.gemspec +1 -1
- metadata +14 -12
- data/lib/transrate/express.rb +0 -102
- data/lib/transrate/sam_checker.rb +0 -74
- data/lib/transrate/samtools.rb +0 -146
- data/test/data/express_results.xprs +0 -5
- data/test/test_express.rb +0 -22
- data/test/test_samtools.rb +0 -22
data/lib/transrate/express.rb
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
|
2
|
-
module Transrate
|
3
|
-
|
4
|
-
class ExpressError < StandardError
|
5
|
-
end
|
6
|
-
|
7
|
-
class Express
|
8
|
-
|
9
|
-
require 'ostruct'
|
10
|
-
|
11
|
-
attr_reader :fin_output
|
12
|
-
|
13
|
-
# return an Express object
|
14
|
-
def initialize
|
15
|
-
which = Cmd.new('which express')
|
16
|
-
which.run
|
17
|
-
if !which.status.success?
|
18
|
-
raise ExpressError.new("could not find express in the path")
|
19
|
-
end
|
20
|
-
@express = which.stdout.split("\n").first
|
21
|
-
end
|
22
|
-
|
23
|
-
# return struct containing:
|
24
|
-
# results_file => path to the express results TSV
|
25
|
-
# expression => a hash of target => effective_count
|
26
|
-
# align_samp => path to the sampled alignments file
|
27
|
-
def run assembly, bamfile
|
28
|
-
assembly = assembly.file if assembly.is_a? Assembly
|
29
|
-
|
30
|
-
ex_output = 'results.xprs'
|
31
|
-
@fin_output = "#{File.basename assembly}_#{ex_output}"
|
32
|
-
|
33
|
-
unless File.exists? @fin_output
|
34
|
-
runner = Cmd.new build_command(assembly, bamfile)
|
35
|
-
runner.run
|
36
|
-
unless runner.status.success?
|
37
|
-
logger.warn "express failed. cleaning sam file and trying again"
|
38
|
-
File.delete("hits.1.samp.bam")
|
39
|
-
fix_problem_snap_output bamfile
|
40
|
-
runner.run
|
41
|
-
unless runner.status.success?
|
42
|
-
abort "express failed on the cleaned sam file\n#{runner.stderr}"
|
43
|
-
end
|
44
|
-
end
|
45
|
-
File.rename(ex_output, @fin_output)
|
46
|
-
end
|
47
|
-
return 'hits.1.samp.bam'
|
48
|
-
end
|
49
|
-
|
50
|
-
# return the constructed eXpress command
|
51
|
-
def build_command assembly, bamfile
|
52
|
-
cmd = "#{@express}"
|
53
|
-
cmd << " --output-dir ."
|
54
|
-
cmd << " --output-align-samp"
|
55
|
-
cmd << " --no-update-check"
|
56
|
-
cmd << " --additional-online 1"
|
57
|
-
cmd << " #{File.expand_path assembly}"
|
58
|
-
cmd << " #{File.expand_path bamfile}"
|
59
|
-
cmd
|
60
|
-
end
|
61
|
-
|
62
|
-
# return a hash of target => effective_count created
|
63
|
-
# by parsing the results file
|
64
|
-
def load_expression file
|
65
|
-
expression = {}
|
66
|
-
first = true
|
67
|
-
File.open(file).each do |line|
|
68
|
-
if first # skip header line
|
69
|
-
first = false
|
70
|
-
next
|
71
|
-
end
|
72
|
-
line = line.chomp.split("\t")
|
73
|
-
target = line[1]
|
74
|
-
effective_length = line[3]
|
75
|
-
effective_count = line[7]
|
76
|
-
tpm = line[14]
|
77
|
-
expression[target] = {
|
78
|
-
:eff_len => effective_length.to_i,
|
79
|
-
:eff_count => effective_count.to_f,
|
80
|
-
:tpm => tpm.to_f
|
81
|
-
}
|
82
|
-
end
|
83
|
-
expression
|
84
|
-
end
|
85
|
-
|
86
|
-
def fix_problem_snap_output bam
|
87
|
-
# express failed, probably because of temporary snap error
|
88
|
-
# convert bam to sam
|
89
|
-
sam = "#{File.expand_path(File.basename(bam, File.extname(bam)))}.sam"
|
90
|
-
Samtools.run "view -h #{bam} > #{sam}"
|
91
|
-
# run sam fixer on sam
|
92
|
-
checker = SamChecker.new
|
93
|
-
fixed_sam = "#{File.expand_path(File.basename(sam, File.extname(sam)))}.fixed.sam"
|
94
|
-
checker.fix_sam(sam, fixed_sam)
|
95
|
-
# convert sam to bam
|
96
|
-
Samtools.run "view -bS #{fixed_sam} > #{bam}"
|
97
|
-
bam
|
98
|
-
end
|
99
|
-
|
100
|
-
end # Express
|
101
|
-
|
102
|
-
end # Transrate
|
@@ -1,74 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class SamChecker
|
4
|
-
|
5
|
-
def initialize
|
6
|
-
@contigs = {}
|
7
|
-
@reference = ""
|
8
|
-
@count = 0
|
9
|
-
@percent = 0
|
10
|
-
@first = true
|
11
|
-
end
|
12
|
-
|
13
|
-
def check sam
|
14
|
-
cols = sam.split("\t")
|
15
|
-
|
16
|
-
reference = cols[2]
|
17
|
-
length = @contigs[reference]
|
18
|
-
|
19
|
-
seq_length = cols[9].length
|
20
|
-
position = cols[3].to_i
|
21
|
-
cigar = cols[5]
|
22
|
-
# this generates a list of pairs in the form [ ["10", "M"], ["1", "D"] ]
|
23
|
-
list = cigar.split(/[MDIS]/).zip(cigar.scan(/[MDIS]/))
|
24
|
-
list.each_with_index do |a, i|
|
25
|
-
c=a[0].to_i
|
26
|
-
t=a[1]
|
27
|
-
if t=="M" or t=="D"
|
28
|
-
position += c
|
29
|
-
elsif i==0 and t=="S"
|
30
|
-
position += c
|
31
|
-
end
|
32
|
-
end
|
33
|
-
if position > length + 1
|
34
|
-
return false
|
35
|
-
else
|
36
|
-
return true
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def fix_sam input, output
|
41
|
-
sam1 = ""
|
42
|
-
File.open("#{output}", "wb") do |out|
|
43
|
-
File.open("#{input}").each_line do |sam|
|
44
|
-
if sam =~ /^@/
|
45
|
-
# header
|
46
|
-
# @SQ SN:Locus_1_Transcript_13/342_Confidence_1.000_Length_1605 LN:1605
|
47
|
-
if sam[0..2]=="@SQ"
|
48
|
-
cols = sam.split("\t")
|
49
|
-
name = cols[1][3..-1]
|
50
|
-
length = cols[2][3..-1].to_i
|
51
|
-
@contigs[name] = length
|
52
|
-
end
|
53
|
-
out.write sam
|
54
|
-
else
|
55
|
-
# alignment
|
56
|
-
if @first
|
57
|
-
sam1 = sam.dup
|
58
|
-
@first = false
|
59
|
-
else
|
60
|
-
if check(sam1) and check(sam)
|
61
|
-
out.write(sam1)
|
62
|
-
out.write(sam)
|
63
|
-
end
|
64
|
-
@first = true
|
65
|
-
end
|
66
|
-
@count+=1
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
data/lib/transrate/samtools.rb
DELETED
@@ -1,146 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class Samtools
|
4
|
-
|
5
|
-
class SamtoolsError < StandardError; end
|
6
|
-
|
7
|
-
# Get the path to the samtools binary built when bio-samtools
|
8
|
-
# was installed
|
9
|
-
def self.path
|
10
|
-
if !@path
|
11
|
-
which_samtools = Cmd.new("which samtools")
|
12
|
-
which_samtools.run
|
13
|
-
if !which_samtools.status.success?
|
14
|
-
raise SamtoolsError.new("could not find samtools in the path")
|
15
|
-
end
|
16
|
-
@path = which_samtools.stdout.split("\n").first
|
17
|
-
end
|
18
|
-
return @path
|
19
|
-
end
|
20
|
-
|
21
|
-
# Run a samtools command
|
22
|
-
def self.run cmd
|
23
|
-
runcmd = Cmd.new "#{Samtools.path} #{cmd}"
|
24
|
-
runcmd.run
|
25
|
-
if !runcmd.status.success?
|
26
|
-
raise SamtoolsError.new("Samtools command failed: #{runcmd}" +
|
27
|
-
"\n#{runcmd.stderr}" +
|
28
|
-
"\n#{runcmd.stdout}")
|
29
|
-
end
|
30
|
-
runcmd.stdout
|
31
|
-
end
|
32
|
-
|
33
|
-
# Convert a sam file to a bam file, returning the path to the bamfile
|
34
|
-
def self.sam_to_bam samfile
|
35
|
-
bamfile = File.basename(samfile, '.sam') + '.bam'
|
36
|
-
bamfile = File.expand_path bamfile
|
37
|
-
if !File.exist?(bamfile)
|
38
|
-
Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
|
39
|
-
end
|
40
|
-
bamfile
|
41
|
-
end
|
42
|
-
|
43
|
-
# Sort a bam file, returning the path to the sorted bamfile
|
44
|
-
def self.sort_bam bamfile, threads=4
|
45
|
-
# the sort command behaves inconsistently with the other commands:
|
46
|
-
# it takes an output prefix rather than a filename
|
47
|
-
# and automatically adds the .bam extension
|
48
|
-
sorted = File.basename(bamfile, '.bam') + '.sorted'
|
49
|
-
if !File.exist?("#{sorted}.bam")
|
50
|
-
cmd = "sort"
|
51
|
-
cmd << " -@ #{threads}"
|
52
|
-
cmd << " #{File.expand_path bamfile} #{sorted}"
|
53
|
-
Samtools.run cmd
|
54
|
-
end
|
55
|
-
File.expand_path(sorted + '.bam')
|
56
|
-
end
|
57
|
-
|
58
|
-
# Sort a bam file by readname only, returning the path to th
|
59
|
-
# sorted bamfile
|
60
|
-
def self.readsort_bam bamfile, threads=4
|
61
|
-
# the sort command behaves inconsistently with the other commands:
|
62
|
-
# it takes an output prefix rather than a filename
|
63
|
-
# and automatically adds the .bam extension
|
64
|
-
sorted = File.basename(bamfile, '.bam') + '.readsorted'
|
65
|
-
if !File.exist?("#{sorted}.bam")
|
66
|
-
cmd = "sort"
|
67
|
-
cmd << " -@ #{threads}"
|
68
|
-
cmd << " -n" # sort by read name only
|
69
|
-
cmd << " #{File.expand_path bamfile} #{sorted}"
|
70
|
-
Samtools.run cmd
|
71
|
-
end
|
72
|
-
File.expand_path(sorted + '.bam')
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
# Index a bamfile, returning the path to the index
|
77
|
-
def self.index_bam bamfile
|
78
|
-
index = File.basename(bamfile, '.bam') + '.bai'
|
79
|
-
index = File.expand_path index
|
80
|
-
Samtools.run "index #{File.expand_path bamfile} #{index}"
|
81
|
-
index
|
82
|
-
end
|
83
|
-
|
84
|
-
# Convert a sam file to bam, sort and index the bam, returning
|
85
|
-
# an array of paths to the bamfile, sorted bamfile and index respectively
|
86
|
-
def self.sam_to_sorted_indexed_bam samfile
|
87
|
-
bamfile = Samtools.sam_to_bam samfile
|
88
|
-
sorted = Samtools.sort_bam bamfile
|
89
|
-
index = Samtools.index_bam bamfile
|
90
|
-
[bamfile, sorted, index]
|
91
|
-
end
|
92
|
-
|
93
|
-
# Calculate per-base coverage from a sorted, indexed bam file
|
94
|
-
# return the path to the coverage file
|
95
|
-
def self.coverage bam
|
96
|
-
outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
|
97
|
-
if !File.exist?(outfile)
|
98
|
-
cmd = "mpileup"
|
99
|
-
cmd += " -f #{File.expand_path bam.fasta}" # reference
|
100
|
-
cmd += " -B" # don't calculate BAQ quality scores
|
101
|
-
cmd += " -Q0" # include all reads ignoring quality
|
102
|
-
cmd += " -I" # don't do genotype calculations
|
103
|
-
cmd += " #{File.expand_path bam.bam}" # the bam file
|
104
|
-
cmd += " > #{outfile}"
|
105
|
-
Samtools.run cmd
|
106
|
-
end
|
107
|
-
outfile
|
108
|
-
end
|
109
|
-
|
110
|
-
# Calculate per-base coverage and mapQ score from a sorted, indexed
|
111
|
-
# bam file. Return the path to the coverage file.
|
112
|
-
def self.bam_to_bcf(bam, fasta)
|
113
|
-
outfile = File.expand_path "#{File.basename(fasta)}.bcf"
|
114
|
-
if !File.exist?(outfile)
|
115
|
-
cmd = "samtools mpileup"
|
116
|
-
cmd << " -f #{File.expand_path fasta}" # reference
|
117
|
-
cmd << " -B" # don't calculate BAQ quality scores
|
118
|
-
cmd << " -q0" # include all multimapping reads
|
119
|
-
cmd << " -Q0" # include all reads ignoring quality
|
120
|
-
cmd << " -I" # don't do genotype calculations
|
121
|
-
cmd << " -u" # output uncompressed bcf format
|
122
|
-
cmd << " #{File.expand_path bam}" # the bam file
|
123
|
-
cmd << " | bcftools view -cg - "
|
124
|
-
cmd << " > #{outfile}"
|
125
|
-
mpileup = Cmd.new cmd
|
126
|
-
mpileup.run
|
127
|
-
if !mpileup.status.success?
|
128
|
-
raise RuntimeError.new("samtools and bcftools failed")
|
129
|
-
end
|
130
|
-
end
|
131
|
-
outfile
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.merge_bam left, right, out, threads=1
|
135
|
-
cmd = "merge"
|
136
|
-
cmd << " -@ #{threads}"
|
137
|
-
cmd << " #{out}"
|
138
|
-
cmd << " #{left}"
|
139
|
-
cmd << " #{right}"
|
140
|
-
Samtools.run cmd
|
141
|
-
out
|
142
|
-
end
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
end
|
@@ -1,5 +0,0 @@
|
|
1
|
-
bundle_id target_id length eff_length tot_counts uniq_counts est_counts eff_counts ambig_distr_alpha ambig_distr_beta fpkm fpkm_conf_low fpkm_conf_high solvable tpm
|
2
|
-
1 C291600 261 54.369218 10 10 10.000000 48.005105 0.000000e+00 0.000000e+00 5.410108e+00 5.410108e+00 5.410108e+00 T 5.417487e+00
|
3
|
-
2 C196710 138 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
|
4
|
-
3 C378763 1364 1096.889202 195 195 195.000000 242.485749 0.000000e+00 0.000000e+00 5.229148e+00 5.183800e+00 5.274496e+00 T 5.236279e+00
|
5
|
-
4 C132376 100 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
|
data/test/test_express.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
require 'tmpdir'
|
3
|
-
|
4
|
-
class TestExpress < Test::Unit::TestCase
|
5
|
-
|
6
|
-
context "Express" do
|
7
|
-
|
8
|
-
should "load an expression file" do
|
9
|
-
file = File.join(File.dirname(__FILE__), 'data',
|
10
|
-
'express_results.xprs')
|
11
|
-
e = Transrate::Express.new
|
12
|
-
results = e.load_expression file
|
13
|
-
assert_equal 4, results.size, "should be four results loaded"
|
14
|
-
assert_equal 54, results['C291600'][:eff_len], "eff length is wrong"
|
15
|
-
assert_equal 48.005105, results['C291600'][:eff_count],
|
16
|
-
"eff count is wrong"
|
17
|
-
assert_equal 5.417487e+00, results['C291600'][:tpm], "tpm is wrong"
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
data/test/test_samtools.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
class TestSamtools < Test::Unit::TestCase
|
4
|
-
|
5
|
-
context "samtools" do
|
6
|
-
|
7
|
-
should "know the path to samtools binary" do
|
8
|
-
msg = /Program: samtools/
|
9
|
-
path = Transrate::Samtools.path
|
10
|
-
res = `#{path} 2>&1`.split("\n").join
|
11
|
-
assert msg =~ res
|
12
|
-
end
|
13
|
-
|
14
|
-
should "run commands" do
|
15
|
-
sam = File.join(File.dirname(__FILE__), 'data', 'tiny.sam')
|
16
|
-
Transrate::Samtools.run "view -bS #{sam} > tiny.bam"
|
17
|
-
assert_equal 460, File.size('tiny.bam'), 'bam file should be created'
|
18
|
-
File.delete 'tiny.bam'
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|