transrate 1.0.0.beta1 → 1.0.0.beta2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.travis.yml +8 -0
- data/CITATION +3 -0
- data/README.md +1 -1
- data/Rakefile +71 -0
- data/bin/transrate +92 -41
- data/deps/blast.yaml +27 -0
- data/deps/deps.yaml +36 -62
- data/ext/transrate/transrate.c +9 -0
- data/lib/transrate/assembly.rb +21 -11
- data/lib/transrate/comparative_metrics.rb +2 -2
- data/lib/transrate/contig.rb +8 -3
- data/lib/transrate/read_metrics.rb +22 -62
- data/lib/transrate/salmon.rb +67 -0
- data/lib/transrate/snap.rb +4 -32
- data/lib/transrate/transrater.rb +1 -1
- data/lib/transrate/version.rb +1 -1
- data/lib/transrate.rb +18 -15
- data/test/data/sorghum_100.fa +200 -0
- data/test/data/test.sf +30 -0
- data/test/helper.rb +13 -0
- data/test/test_assembly.rb +54 -0
- data/test/test_bin.rb +30 -27
- data/test/test_cmd.rb +5 -0
- data/test/test_contig.rb +9 -14
- data/test/test_read_metrics.rb +66 -42
- data/test/test_salmon.rb +33 -0
- data/test/test_snap.rb +27 -0
- data/test/test_transrater.rb +10 -10
- data/transrate.gemspec +1 -1
- metadata +14 -12
- data/lib/transrate/express.rb +0 -102
- data/lib/transrate/sam_checker.rb +0 -74
- data/lib/transrate/samtools.rb +0 -146
- data/test/data/express_results.xprs +0 -5
- data/test/test_express.rb +0 -22
- data/test/test_samtools.rb +0 -22
data/lib/transrate/express.rb
DELETED
@@ -1,102 +0,0 @@
|
|
1
|
-
|
2
|
-
module Transrate
|
3
|
-
|
4
|
-
class ExpressError < StandardError
|
5
|
-
end
|
6
|
-
|
7
|
-
class Express
|
8
|
-
|
9
|
-
require 'ostruct'
|
10
|
-
|
11
|
-
attr_reader :fin_output
|
12
|
-
|
13
|
-
# return an Express object
|
14
|
-
def initialize
|
15
|
-
which = Cmd.new('which express')
|
16
|
-
which.run
|
17
|
-
if !which.status.success?
|
18
|
-
raise ExpressError.new("could not find express in the path")
|
19
|
-
end
|
20
|
-
@express = which.stdout.split("\n").first
|
21
|
-
end
|
22
|
-
|
23
|
-
# return struct containing:
|
24
|
-
# results_file => path to the express results TSV
|
25
|
-
# expression => a hash of target => effective_count
|
26
|
-
# align_samp => path to the sampled alignments file
|
27
|
-
def run assembly, bamfile
|
28
|
-
assembly = assembly.file if assembly.is_a? Assembly
|
29
|
-
|
30
|
-
ex_output = 'results.xprs'
|
31
|
-
@fin_output = "#{File.basename assembly}_#{ex_output}"
|
32
|
-
|
33
|
-
unless File.exists? @fin_output
|
34
|
-
runner = Cmd.new build_command(assembly, bamfile)
|
35
|
-
runner.run
|
36
|
-
unless runner.status.success?
|
37
|
-
logger.warn "express failed. cleaning sam file and trying again"
|
38
|
-
File.delete("hits.1.samp.bam")
|
39
|
-
fix_problem_snap_output bamfile
|
40
|
-
runner.run
|
41
|
-
unless runner.status.success?
|
42
|
-
abort "express failed on the cleaned sam file\n#{runner.stderr}"
|
43
|
-
end
|
44
|
-
end
|
45
|
-
File.rename(ex_output, @fin_output)
|
46
|
-
end
|
47
|
-
return 'hits.1.samp.bam'
|
48
|
-
end
|
49
|
-
|
50
|
-
# return the constructed eXpress command
|
51
|
-
def build_command assembly, bamfile
|
52
|
-
cmd = "#{@express}"
|
53
|
-
cmd << " --output-dir ."
|
54
|
-
cmd << " --output-align-samp"
|
55
|
-
cmd << " --no-update-check"
|
56
|
-
cmd << " --additional-online 1"
|
57
|
-
cmd << " #{File.expand_path assembly}"
|
58
|
-
cmd << " #{File.expand_path bamfile}"
|
59
|
-
cmd
|
60
|
-
end
|
61
|
-
|
62
|
-
# return a hash of target => effective_count created
|
63
|
-
# by parsing the results file
|
64
|
-
def load_expression file
|
65
|
-
expression = {}
|
66
|
-
first = true
|
67
|
-
File.open(file).each do |line|
|
68
|
-
if first # skip header line
|
69
|
-
first = false
|
70
|
-
next
|
71
|
-
end
|
72
|
-
line = line.chomp.split("\t")
|
73
|
-
target = line[1]
|
74
|
-
effective_length = line[3]
|
75
|
-
effective_count = line[7]
|
76
|
-
tpm = line[14]
|
77
|
-
expression[target] = {
|
78
|
-
:eff_len => effective_length.to_i,
|
79
|
-
:eff_count => effective_count.to_f,
|
80
|
-
:tpm => tpm.to_f
|
81
|
-
}
|
82
|
-
end
|
83
|
-
expression
|
84
|
-
end
|
85
|
-
|
86
|
-
def fix_problem_snap_output bam
|
87
|
-
# express failed, probably because of temporary snap error
|
88
|
-
# convert bam to sam
|
89
|
-
sam = "#{File.expand_path(File.basename(bam, File.extname(bam)))}.sam"
|
90
|
-
Samtools.run "view -h #{bam} > #{sam}"
|
91
|
-
# run sam fixer on sam
|
92
|
-
checker = SamChecker.new
|
93
|
-
fixed_sam = "#{File.expand_path(File.basename(sam, File.extname(sam)))}.fixed.sam"
|
94
|
-
checker.fix_sam(sam, fixed_sam)
|
95
|
-
# convert sam to bam
|
96
|
-
Samtools.run "view -bS #{fixed_sam} > #{bam}"
|
97
|
-
bam
|
98
|
-
end
|
99
|
-
|
100
|
-
end # Express
|
101
|
-
|
102
|
-
end # Transrate
|
@@ -1,74 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class SamChecker
|
4
|
-
|
5
|
-
def initialize
|
6
|
-
@contigs = {}
|
7
|
-
@reference = ""
|
8
|
-
@count = 0
|
9
|
-
@percent = 0
|
10
|
-
@first = true
|
11
|
-
end
|
12
|
-
|
13
|
-
def check sam
|
14
|
-
cols = sam.split("\t")
|
15
|
-
|
16
|
-
reference = cols[2]
|
17
|
-
length = @contigs[reference]
|
18
|
-
|
19
|
-
seq_length = cols[9].length
|
20
|
-
position = cols[3].to_i
|
21
|
-
cigar = cols[5]
|
22
|
-
# this generates a list of pairs in the form [ ["10", "M"], ["1", "D"] ]
|
23
|
-
list = cigar.split(/[MDIS]/).zip(cigar.scan(/[MDIS]/))
|
24
|
-
list.each_with_index do |a, i|
|
25
|
-
c=a[0].to_i
|
26
|
-
t=a[1]
|
27
|
-
if t=="M" or t=="D"
|
28
|
-
position += c
|
29
|
-
elsif i==0 and t=="S"
|
30
|
-
position += c
|
31
|
-
end
|
32
|
-
end
|
33
|
-
if position > length + 1
|
34
|
-
return false
|
35
|
-
else
|
36
|
-
return true
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def fix_sam input, output
|
41
|
-
sam1 = ""
|
42
|
-
File.open("#{output}", "wb") do |out|
|
43
|
-
File.open("#{input}").each_line do |sam|
|
44
|
-
if sam =~ /^@/
|
45
|
-
# header
|
46
|
-
# @SQ SN:Locus_1_Transcript_13/342_Confidence_1.000_Length_1605 LN:1605
|
47
|
-
if sam[0..2]=="@SQ"
|
48
|
-
cols = sam.split("\t")
|
49
|
-
name = cols[1][3..-1]
|
50
|
-
length = cols[2][3..-1].to_i
|
51
|
-
@contigs[name] = length
|
52
|
-
end
|
53
|
-
out.write sam
|
54
|
-
else
|
55
|
-
# alignment
|
56
|
-
if @first
|
57
|
-
sam1 = sam.dup
|
58
|
-
@first = false
|
59
|
-
else
|
60
|
-
if check(sam1) and check(sam)
|
61
|
-
out.write(sam1)
|
62
|
-
out.write(sam)
|
63
|
-
end
|
64
|
-
@first = true
|
65
|
-
end
|
66
|
-
@count+=1
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
end
|
71
|
-
|
72
|
-
end
|
73
|
-
|
74
|
-
end
|
data/lib/transrate/samtools.rb
DELETED
@@ -1,146 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class Samtools
|
4
|
-
|
5
|
-
class SamtoolsError < StandardError; end
|
6
|
-
|
7
|
-
# Get the path to the samtools binary built when bio-samtools
|
8
|
-
# was installed
|
9
|
-
def self.path
|
10
|
-
if !@path
|
11
|
-
which_samtools = Cmd.new("which samtools")
|
12
|
-
which_samtools.run
|
13
|
-
if !which_samtools.status.success?
|
14
|
-
raise SamtoolsError.new("could not find samtools in the path")
|
15
|
-
end
|
16
|
-
@path = which_samtools.stdout.split("\n").first
|
17
|
-
end
|
18
|
-
return @path
|
19
|
-
end
|
20
|
-
|
21
|
-
# Run a samtools command
|
22
|
-
def self.run cmd
|
23
|
-
runcmd = Cmd.new "#{Samtools.path} #{cmd}"
|
24
|
-
runcmd.run
|
25
|
-
if !runcmd.status.success?
|
26
|
-
raise SamtoolsError.new("Samtools command failed: #{runcmd}" +
|
27
|
-
"\n#{runcmd.stderr}" +
|
28
|
-
"\n#{runcmd.stdout}")
|
29
|
-
end
|
30
|
-
runcmd.stdout
|
31
|
-
end
|
32
|
-
|
33
|
-
# Convert a sam file to a bam file, returning the path to the bamfile
|
34
|
-
def self.sam_to_bam samfile
|
35
|
-
bamfile = File.basename(samfile, '.sam') + '.bam'
|
36
|
-
bamfile = File.expand_path bamfile
|
37
|
-
if !File.exist?(bamfile)
|
38
|
-
Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
|
39
|
-
end
|
40
|
-
bamfile
|
41
|
-
end
|
42
|
-
|
43
|
-
# Sort a bam file, returning the path to the sorted bamfile
|
44
|
-
def self.sort_bam bamfile, threads=4
|
45
|
-
# the sort command behaves inconsistently with the other commands:
|
46
|
-
# it takes an output prefix rather than a filename
|
47
|
-
# and automatically adds the .bam extension
|
48
|
-
sorted = File.basename(bamfile, '.bam') + '.sorted'
|
49
|
-
if !File.exist?("#{sorted}.bam")
|
50
|
-
cmd = "sort"
|
51
|
-
cmd << " -@ #{threads}"
|
52
|
-
cmd << " #{File.expand_path bamfile} #{sorted}"
|
53
|
-
Samtools.run cmd
|
54
|
-
end
|
55
|
-
File.expand_path(sorted + '.bam')
|
56
|
-
end
|
57
|
-
|
58
|
-
# Sort a bam file by readname only, returning the path to th
|
59
|
-
# sorted bamfile
|
60
|
-
def self.readsort_bam bamfile, threads=4
|
61
|
-
# the sort command behaves inconsistently with the other commands:
|
62
|
-
# it takes an output prefix rather than a filename
|
63
|
-
# and automatically adds the .bam extension
|
64
|
-
sorted = File.basename(bamfile, '.bam') + '.readsorted'
|
65
|
-
if !File.exist?("#{sorted}.bam")
|
66
|
-
cmd = "sort"
|
67
|
-
cmd << " -@ #{threads}"
|
68
|
-
cmd << " -n" # sort by read name only
|
69
|
-
cmd << " #{File.expand_path bamfile} #{sorted}"
|
70
|
-
Samtools.run cmd
|
71
|
-
end
|
72
|
-
File.expand_path(sorted + '.bam')
|
73
|
-
end
|
74
|
-
|
75
|
-
|
76
|
-
# Index a bamfile, returning the path to the index
|
77
|
-
def self.index_bam bamfile
|
78
|
-
index = File.basename(bamfile, '.bam') + '.bai'
|
79
|
-
index = File.expand_path index
|
80
|
-
Samtools.run "index #{File.expand_path bamfile} #{index}"
|
81
|
-
index
|
82
|
-
end
|
83
|
-
|
84
|
-
# Convert a sam file to bam, sort and index the bam, returning
|
85
|
-
# an array of paths to the bamfile, sorted bamfile and index respectively
|
86
|
-
def self.sam_to_sorted_indexed_bam samfile
|
87
|
-
bamfile = Samtools.sam_to_bam samfile
|
88
|
-
sorted = Samtools.sort_bam bamfile
|
89
|
-
index = Samtools.index_bam bamfile
|
90
|
-
[bamfile, sorted, index]
|
91
|
-
end
|
92
|
-
|
93
|
-
# Calculate per-base coverage from a sorted, indexed bam file
|
94
|
-
# return the path to the coverage file
|
95
|
-
def self.coverage bam
|
96
|
-
outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
|
97
|
-
if !File.exist?(outfile)
|
98
|
-
cmd = "mpileup"
|
99
|
-
cmd += " -f #{File.expand_path bam.fasta}" # reference
|
100
|
-
cmd += " -B" # don't calculate BAQ quality scores
|
101
|
-
cmd += " -Q0" # include all reads ignoring quality
|
102
|
-
cmd += " -I" # don't do genotype calculations
|
103
|
-
cmd += " #{File.expand_path bam.bam}" # the bam file
|
104
|
-
cmd += " > #{outfile}"
|
105
|
-
Samtools.run cmd
|
106
|
-
end
|
107
|
-
outfile
|
108
|
-
end
|
109
|
-
|
110
|
-
# Calculate per-base coverage and mapQ score from a sorted, indexed
|
111
|
-
# bam file. Return the path to the coverage file.
|
112
|
-
def self.bam_to_bcf(bam, fasta)
|
113
|
-
outfile = File.expand_path "#{File.basename(fasta)}.bcf"
|
114
|
-
if !File.exist?(outfile)
|
115
|
-
cmd = "samtools mpileup"
|
116
|
-
cmd << " -f #{File.expand_path fasta}" # reference
|
117
|
-
cmd << " -B" # don't calculate BAQ quality scores
|
118
|
-
cmd << " -q0" # include all multimapping reads
|
119
|
-
cmd << " -Q0" # include all reads ignoring quality
|
120
|
-
cmd << " -I" # don't do genotype calculations
|
121
|
-
cmd << " -u" # output uncompressed bcf format
|
122
|
-
cmd << " #{File.expand_path bam}" # the bam file
|
123
|
-
cmd << " | bcftools view -cg - "
|
124
|
-
cmd << " > #{outfile}"
|
125
|
-
mpileup = Cmd.new cmd
|
126
|
-
mpileup.run
|
127
|
-
if !mpileup.status.success?
|
128
|
-
raise RuntimeError.new("samtools and bcftools failed")
|
129
|
-
end
|
130
|
-
end
|
131
|
-
outfile
|
132
|
-
end
|
133
|
-
|
134
|
-
def self.merge_bam left, right, out, threads=1
|
135
|
-
cmd = "merge"
|
136
|
-
cmd << " -@ #{threads}"
|
137
|
-
cmd << " #{out}"
|
138
|
-
cmd << " #{left}"
|
139
|
-
cmd << " #{right}"
|
140
|
-
Samtools.run cmd
|
141
|
-
out
|
142
|
-
end
|
143
|
-
|
144
|
-
end
|
145
|
-
|
146
|
-
end
|
@@ -1,5 +0,0 @@
|
|
1
|
-
bundle_id target_id length eff_length tot_counts uniq_counts est_counts eff_counts ambig_distr_alpha ambig_distr_beta fpkm fpkm_conf_low fpkm_conf_high solvable tpm
|
2
|
-
1 C291600 261 54.369218 10 10 10.000000 48.005105 0.000000e+00 0.000000e+00 5.410108e+00 5.410108e+00 5.410108e+00 T 5.417487e+00
|
3
|
-
2 C196710 138 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
|
4
|
-
3 C378763 1364 1096.889202 195 195 195.000000 242.485749 0.000000e+00 0.000000e+00 5.229148e+00 5.183800e+00 5.274496e+00 T 5.236279e+00
|
5
|
-
4 C132376 100 0.000000 0 0 0.000000 0.000000 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 F 0.000000e+00
|
data/test/test_express.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
require 'tmpdir'
|
3
|
-
|
4
|
-
class TestExpress < Test::Unit::TestCase
|
5
|
-
|
6
|
-
context "Express" do
|
7
|
-
|
8
|
-
should "load an expression file" do
|
9
|
-
file = File.join(File.dirname(__FILE__), 'data',
|
10
|
-
'express_results.xprs')
|
11
|
-
e = Transrate::Express.new
|
12
|
-
results = e.load_expression file
|
13
|
-
assert_equal 4, results.size, "should be four results loaded"
|
14
|
-
assert_equal 54, results['C291600'][:eff_len], "eff length is wrong"
|
15
|
-
assert_equal 48.005105, results['C291600'][:eff_count],
|
16
|
-
"eff count is wrong"
|
17
|
-
assert_equal 5.417487e+00, results['C291600'][:tpm], "tpm is wrong"
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
data/test/test_samtools.rb
DELETED
@@ -1,22 +0,0 @@
|
|
1
|
-
require 'helper'
|
2
|
-
|
3
|
-
class TestSamtools < Test::Unit::TestCase
|
4
|
-
|
5
|
-
context "samtools" do
|
6
|
-
|
7
|
-
should "know the path to samtools binary" do
|
8
|
-
msg = /Program: samtools/
|
9
|
-
path = Transrate::Samtools.path
|
10
|
-
res = `#{path} 2>&1`.split("\n").join
|
11
|
-
assert msg =~ res
|
12
|
-
end
|
13
|
-
|
14
|
-
should "run commands" do
|
15
|
-
sam = File.join(File.dirname(__FILE__), 'data', 'tiny.sam')
|
16
|
-
Transrate::Samtools.run "view -bS #{sam} > tiny.bam"
|
17
|
-
assert_equal 460, File.size('tiny.bam'), 'bam file should be created'
|
18
|
-
File.delete 'tiny.bam'
|
19
|
-
end
|
20
|
-
|
21
|
-
end
|
22
|
-
end
|