transrate 0.3.1 → 1.0.0.alpha.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +7 -0
- data/README.md +7 -6
- data/bin/transrate +21 -9
- data/deps/deps.yaml +49 -14
- data/ext/transrate/transrate.c +200 -166
- data/lib/transrate.rb +2 -3
- data/lib/transrate/assembly.rb +0 -49
- data/lib/transrate/cmd.rb +4 -0
- data/lib/transrate/comparative_metrics.rb +16 -64
- data/lib/transrate/contig.rb +57 -18
- data/lib/transrate/express.rb +79 -0
- data/lib/transrate/read_metrics.rb +196 -185
- data/lib/transrate/samtools.rb +88 -16
- data/lib/transrate/snap.rb +123 -0
- data/lib/transrate/transrater.rb +16 -19
- data/lib/transrate/version.rb +4 -4
- data/test/data/bridging_reads.l.fastq +20 -0
- data/test/data/bridging_reads.r.fastq +20 -0
- data/test/test_bin.rb +50 -21
- data/test/test_comp_metrics.rb +3 -27
- data/test/test_contig.rb +8 -0
- data/test/test_inline.rb +1 -1
- data/test/test_read_metrics.rb +108 -19
- data/test/test_transrater.rb +5 -5
- data/transrate.gemspec +2 -5
- metadata +66 -129
- data/lib/transrate/bowtie2.rb +0 -75
- data/lib/transrate/dimension_reduce.rb +0 -18
- data/lib/transrate/metric.rb +0 -16
- data/test/test_bowtie.rb +0 -66
data/lib/transrate/samtools.rb
CHANGED
@@ -1,20 +1,31 @@
|
|
1
|
-
require 'bio-samtools'
|
2
|
-
|
3
1
|
module Transrate
|
4
2
|
|
5
3
|
class Samtools
|
6
4
|
|
5
|
+
class SamtoolsError < StandardError; end
|
6
|
+
|
7
7
|
# Get the path to the samtools binary built when bio-samtools
|
8
8
|
# was installed
|
9
9
|
def self.path
|
10
|
-
|
11
|
-
|
10
|
+
if !@path
|
11
|
+
which_samtools = Cmd.new("which samtools")
|
12
|
+
which_samtools.run
|
13
|
+
if !which_samtools.status.success?
|
14
|
+
raise SamtoolsError.new("could not find samtools in the path")
|
15
|
+
end
|
16
|
+
@path = which_samtools.stdout.split("\n").first
|
17
|
+
end
|
18
|
+
return @path
|
12
19
|
end
|
13
20
|
|
14
21
|
# Run a samtools command
|
15
22
|
def self.run cmd
|
16
23
|
runcmd = Cmd.new "#{Samtools.path} #{cmd}"
|
17
24
|
runcmd.run
|
25
|
+
if !runcmd.status.success?
|
26
|
+
logger.warn "Samtools command failed: #{runcmd}" +
|
27
|
+
"\n#{runcmd.stderr}"
|
28
|
+
end
|
18
29
|
runcmd.stdout
|
19
30
|
end
|
20
31
|
|
@@ -22,20 +33,45 @@ module Transrate
|
|
22
33
|
def self.sam_to_bam samfile
|
23
34
|
bamfile = File.basename(samfile, '.sam') + '.bam'
|
24
35
|
bamfile = File.expand_path bamfile
|
25
|
-
|
26
|
-
|
36
|
+
if !File.exist?(bamfile)
|
37
|
+
Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
|
38
|
+
end
|
39
|
+
bamfile
|
27
40
|
end
|
28
41
|
|
29
42
|
# Sort a bam file, returning the path to the sorted bamfile
|
30
|
-
def self.sort_bam bamfile
|
43
|
+
def self.sort_bam bamfile, threads=4
|
44
|
+
# the sort command behaves inconsistently with the other commands:
|
45
|
+
# it takes an output prefix rather than a filename
|
46
|
+
# and automatically adds the .bam extension
|
47
|
+
sorted = File.basename(bamfile, '.bam') + '.sorted'
|
48
|
+
if !File.exist?("#{sorted}.bam")
|
49
|
+
cmd = "sort"
|
50
|
+
cmd << " -l #{threads}"
|
51
|
+
cmd << " #{File.expand_path bamfile} #{sorted}"
|
52
|
+
Samtools.run cmd
|
53
|
+
end
|
54
|
+
File.expand_path(sorted + '.bam')
|
55
|
+
end
|
56
|
+
|
57
|
+
# Sort a bam file by readname only, returning the path to th
|
58
|
+
# sorted bamfile
|
59
|
+
def self.readsort_bam bamfile, threads=4
|
31
60
|
# the sort command behaves inconsistently with the other commands:
|
32
61
|
# it takes an output prefix rather than a filename
|
33
62
|
# and automatically adds the .bam extension
|
34
63
|
sorted = File.basename(bamfile, '.bam') + '.sorted'
|
35
|
-
|
64
|
+
if !File.exist?("#{sorted}.bam")
|
65
|
+
cmd = "sort"
|
66
|
+
cmd << " -l #{threads}"
|
67
|
+
cmd << " -n" # sort by read name only
|
68
|
+
cmd << " #{File.expand_path bamfile} #{sorted}"
|
69
|
+
Samtools.run cmd
|
70
|
+
end
|
36
71
|
File.expand_path(sorted + '.bam')
|
37
72
|
end
|
38
73
|
|
74
|
+
|
39
75
|
# Index a bamfile, returning the path to the index
|
40
76
|
def self.index_bam bamfile
|
41
77
|
index = File.basename(bamfile, '.bam') + '.bai'
|
@@ -57,17 +93,53 @@ module Transrate
|
|
57
93
|
# return the path to the coverage file
|
58
94
|
def self.coverage bam
|
59
95
|
outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
96
|
+
if !File.exist?(outfile)
|
97
|
+
cmd = "mpileup"
|
98
|
+
cmd += " -f #{File.expand_path bam.fasta}" # reference
|
99
|
+
cmd += " -B" # don't calculate BAQ quality scores
|
100
|
+
cmd += " -Q0" # include all reads ignoring quality
|
101
|
+
cmd += " -I" # don't do genotype calculations
|
102
|
+
cmd += " #{File.expand_path bam.bam}" # the bam file
|
103
|
+
cmd += " > #{outfile}"
|
104
|
+
Samtools.run cmd
|
105
|
+
end
|
106
|
+
outfile
|
107
|
+
end
|
108
|
+
|
109
|
+
# Calculate per-base coverage and mapQ score from a sorted, indexed
|
110
|
+
# bam file. Return the path to the coverage file.
|
111
|
+
def self.bam_to_bcf(bam, fasta)
|
112
|
+
outfile = File.expand_path "#{File.basename(fasta)}.bcf"
|
113
|
+
if !File.exist?(outfile)
|
114
|
+
cmd = "samtools mpileup"
|
115
|
+
cmd << " -f #{File.expand_path fasta}" # reference
|
116
|
+
cmd << " -B" # don't calculate BAQ quality scores
|
117
|
+
cmd << " -q0" # include all multimapping reads
|
118
|
+
cmd << " -Q0" # include all reads ignoring quality
|
119
|
+
cmd << " -I" # don't do genotype calculations
|
120
|
+
cmd << " -u" # output uncompressed bcf format
|
121
|
+
cmd << " #{File.expand_path bam}" # the bam file
|
122
|
+
cmd << " | bcftools view -cg - "
|
123
|
+
cmd << " > #{outfile}"
|
124
|
+
mpileup = Cmd.new cmd
|
125
|
+
mpileup.run
|
126
|
+
if !mpileup.status.success?
|
127
|
+
raise RuntimeError.new("samtools and bcftools failed")
|
128
|
+
end
|
129
|
+
end
|
68
130
|
outfile
|
69
131
|
end
|
70
132
|
|
133
|
+
def self.merge_bam left, right, out, threads=1
|
134
|
+
cmd = "merge"
|
135
|
+
cmd << " -@ #{threads}"
|
136
|
+
cmd << " #{out}"
|
137
|
+
cmd << " #{left}"
|
138
|
+
cmd << " #{right}"
|
139
|
+
Samtools.run cmd
|
140
|
+
out
|
141
|
+
end
|
142
|
+
|
71
143
|
end
|
72
144
|
|
73
145
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class SnapError < StandardError
|
4
|
+
end
|
5
|
+
|
6
|
+
class Snap
|
7
|
+
|
8
|
+
attr_reader :index_name, :sam, :read_count
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
which_snap = Cmd.new('which snap')
|
12
|
+
which_snap.run
|
13
|
+
if !which_snap.status.success?
|
14
|
+
raise SnapError.new("could not find snap in the path")
|
15
|
+
end
|
16
|
+
@snap = which_snap.stdout.split("\n").first
|
17
|
+
|
18
|
+
@index_built = false
|
19
|
+
@index_name = ""
|
20
|
+
end
|
21
|
+
|
22
|
+
def build_paired_cmd l, r, threads
|
23
|
+
cmd = "#{@snap} paired #{@index_name}"
|
24
|
+
l.split(",").zip(r.split(",")).each do |left, right|
|
25
|
+
cmd << " #{left} #{right}"
|
26
|
+
end
|
27
|
+
# NOTE: do NOT turn on the -so flag (sort bam output)
|
28
|
+
# it violates the basic assumption of eXpress's streaming
|
29
|
+
# algorithm: that the fragments are observed in approximately
|
30
|
+
# random order.
|
31
|
+
cmd << " -o #{@bam}"
|
32
|
+
cmd << " -s 0 1000" # min and max distance between paired-read starts
|
33
|
+
cmd << " -H 300000" # max seed hits to consider in paired mode
|
34
|
+
cmd << " -h 2000" # max seed hits to consider when reverting to single
|
35
|
+
cmd << " -I" # ignore read IDs
|
36
|
+
cmd << " -d 30" # max edit distance (function of read length?)
|
37
|
+
cmd << " -t #{threads}"
|
38
|
+
cmd << " -b" # bind threads to cores
|
39
|
+
cmd << " -M" # format cigar string
|
40
|
+
cmd << " -sa" # keep all alignments, don't discard 0x100
|
41
|
+
# cmd << " -C++" # trim low-quality bases from front and back of reads
|
42
|
+
cmd
|
43
|
+
end
|
44
|
+
|
45
|
+
def map_reads(file, left, right, insertsize: 200,
|
46
|
+
insertsd: 50, outputname: nil, threads: 8)
|
47
|
+
raise SnapError.new("Index not built") if !@index_built
|
48
|
+
|
49
|
+
lbase = File.basename(left.split(",").first)
|
50
|
+
rbase = File.basename(right.split(",").first)
|
51
|
+
index = File.basename(@index_name)
|
52
|
+
@bam = File.expand_path("#{lbase}.#{rbase}.#{index}.bam")
|
53
|
+
@read_count_file = "#{lbase}-#{rbase}-read_count.txt"
|
54
|
+
|
55
|
+
unless File.exists? @bam
|
56
|
+
snapcmd = build_paired_cmd(left, right, threads)
|
57
|
+
runner = Cmd.new snapcmd
|
58
|
+
runner.run
|
59
|
+
save_readcount runner.stdout
|
60
|
+
unless runner.status.success?
|
61
|
+
raise SnapError.new("Snap failed\n#{runner.stderr}")
|
62
|
+
end
|
63
|
+
else
|
64
|
+
load_readcount left
|
65
|
+
end
|
66
|
+
@bam
|
67
|
+
end
|
68
|
+
|
69
|
+
def save_readcount stdout
|
70
|
+
stdout.split("\n").each do |line|
|
71
|
+
cols = line.split(/\s+/)
|
72
|
+
if cols[0]=="2000" and cols[1]=="30"
|
73
|
+
@read_count = cols[8].to_i / 2
|
74
|
+
File.open("#{@read_count_file}", "wb") do |out|
|
75
|
+
out.write("#{@read_count}\n")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def load_readcount reads
|
82
|
+
@read_count = 0
|
83
|
+
if File.exist?("#{@read_count_file}")
|
84
|
+
@read_count = File.open("#{@read_count_file}").readlines.join.to_i
|
85
|
+
else
|
86
|
+
reads.split(",").each do |l|
|
87
|
+
cmd = "wc -l #{l}"
|
88
|
+
count = Cmd.new(cmd)
|
89
|
+
count.run
|
90
|
+
if count.status.success?
|
91
|
+
@read_count += count.stdout.strip.split(/\s+/).first.to_i/4
|
92
|
+
File.open("#{@read_count_file}", "wb") do |out|
|
93
|
+
out.write("#{@read_count}\n")
|
94
|
+
end
|
95
|
+
else
|
96
|
+
logger.warn "couldn't get number of reads from #{l}"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def build_index file, threads
|
103
|
+
@index_name = File.basename(file, File.extname(file))
|
104
|
+
unless Dir.exists?(@index_name)
|
105
|
+
overflow = 500
|
106
|
+
cmd = "#{@snap} index #{file} #{@index_name}"
|
107
|
+
cmd << " -s 23"
|
108
|
+
cmd << " -t#{threads}"
|
109
|
+
cmd << " -bSpace" # contig name terminates with space char
|
110
|
+
runner = Cmd.new cmd
|
111
|
+
runner.run
|
112
|
+
if !runner.status.success?
|
113
|
+
err = runner.stderr
|
114
|
+
msg = "Failed to build Snap index\n#{runner.stderr}"
|
115
|
+
raise SnapError.new(msg)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
@index_built = true
|
119
|
+
end
|
120
|
+
|
121
|
+
end # Snap
|
122
|
+
|
123
|
+
end # Transrate
|
data/lib/transrate/transrater.rb
CHANGED
@@ -22,10 +22,8 @@ module Transrate
|
|
22
22
|
# @param right [String] path to the right reads
|
23
23
|
# @param insertsize [Integer] mean insert size of the read pairs
|
24
24
|
# @param insertsd [Integer] standard deviation of the read pair insert size
|
25
|
-
def initialize(assembly, reference,
|
26
|
-
|
27
|
-
insertsize: nil, insertsd: nil,
|
28
|
-
threads: 1)
|
25
|
+
def initialize(assembly, reference, left: nil, right: nil,
|
26
|
+
insertsize: nil, insertsd: nil, threads: 1)
|
29
27
|
if assembly
|
30
28
|
if assembly.is_a?(Assembly)
|
31
29
|
@assembly = assembly
|
@@ -64,23 +62,22 @@ module Transrate
|
|
64
62
|
comparative_metrics
|
65
63
|
end
|
66
64
|
|
67
|
-
#
|
68
|
-
|
69
|
-
|
70
|
-
|
65
|
+
# Calculate the geometric mean of an array of numbers
|
66
|
+
def geomean(x)
|
67
|
+
sum = 0.0
|
68
|
+
x.each{ |v| sum += Math.log(v) }
|
69
|
+
sum /= x.size
|
70
|
+
Math.exp(sum)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Reduce all metrics for the assembly to a single quality score
|
74
|
+
# by taking the geometric mean of the scores for all contigs
|
75
|
+
# and multiplying it by the proportion of fragments whose most likely
|
76
|
+
# mapping is consistent with the assembly
|
71
77
|
# @return [Integer] the assembly score
|
72
78
|
def assembly_score
|
73
|
-
@score
|
74
|
-
|
75
|
-
pg = Metric.new('pg', @read_metrics.pr_good_mapping, 0.0)
|
76
|
-
end
|
77
|
-
if @comparative_metrics.has_run
|
78
|
-
rc = Metric.new('rc', @comparative_metrics.reference_coverage, 0.0)
|
79
|
-
end
|
80
|
-
if (pg && rc)
|
81
|
-
@score = DimensionReduce.dimension_reduce([pg, rc])
|
82
|
-
end
|
83
|
-
return @score
|
79
|
+
@score = geomean assembly.assembly.values.map{ |contig| contig.score }
|
80
|
+
return @score * @read_metrics.p_good_mapping
|
84
81
|
end
|
85
82
|
|
86
83
|
def assembly_metrics
|
data/lib/transrate/version.rb
CHANGED
@@ -8,10 +8,10 @@ module Transrate
|
|
8
8
|
# the Gem. Versions must be incremented in accordance with
|
9
9
|
# Semantic Versioning 2.0 (http://semver.org/).
|
10
10
|
module VERSION
|
11
|
-
MAJOR =
|
12
|
-
MINOR =
|
13
|
-
PATCH =
|
14
|
-
BUILD =
|
11
|
+
MAJOR = 1
|
12
|
+
MINOR = 0
|
13
|
+
PATCH = 0
|
14
|
+
BUILD = 'alpha.1'
|
15
15
|
|
16
16
|
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
17
17
|
end
|
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
|
18
18
|
ACGACGCGCACCACATCAACAAGCACAACTTCCGTGTCCCCTTCGTCTGTGGCTGCCGTGACCTTGGTGAGGCGCTCCGGAGGGTCCGTGAGGGCGCCGC
|
19
19
|
+
|
20
20
|
gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
21
|
+
@FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/1
|
22
|
+
GTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGCTAAGCGCATCGCCG
|
23
|
+
+
|
24
|
+
_[_eeeeegebe`efddgaeehhhhfgfQ^eb`adfgfffhZ_db\adb`aaabaWQT]`b_b]Y_b`cbbb`]`][]^]aaX^bbdbbY^[_a_[[O][
|
25
|
+
@FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/1
|
26
|
+
GGCCACGCCTGCTGATGCCGCGCTCATGATGCAGCTTGGGTGCGACGGCGTCTTCGTCGGCTC
|
27
|
+
+
|
28
|
+
bbbeeeeef`gcghcegdghfhfdfdfdghhfgfhhhhhgG_Z`__`c[_RZa^baX^aaZ_a
|
29
|
+
@FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/1
|
30
|
+
CTCATGTCTCTCCATCAGTAATAACGGGACTGAATCAATGGTAAGAAACAAAGCTATGGTACTATGCAGAACCTCTTTATTTTT
|
31
|
+
+
|
32
|
+
__^cc`\cecabcdea_d[`b^[abaUeW^a_e[S^afS^^Y^acbfedeb]^cacefh_V\W\efbddYcd`deV\aRV^ccc
|
33
|
+
@FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/1
|
34
|
+
TATGATTTGGTGATGCAGACCAAGCAGCTGGGCCGCCTCCCTGTTGTGCAGTTCGCGGCCGGGGGTGTGGCCACGCCTGCTGATGCCGCGCTCATGATGC
|
35
|
+
+
|
36
|
+
__[cccceggbeeddghhdhhhhhdhhfbfhhhhhhhhhhhhhghfefg_fdcgbdaabaaa_[]LEO^^^``[X_^^`ab`]]_bbac_X]XEYR_b]_
|
37
|
+
@FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/1
|
38
|
+
GTTCAGGCTGTCACCCACTACAGCGACCCTACCATCCTCGCCGACGTCAGCGCCGGACTCGGGGAGGCCATGGTCGGCATCAACCTCAACGACCCTA
|
39
|
+
+
|
40
|
+
b_beeeeeggggfiihhihihifhiiiiiiihfhefhiihiiiiigeedcdcca^cacccacccccc_acbbbS]][_[ab_`X`]`bcc[[X_a^b
|
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
|
18
18
|
GGACCGGCAACATCGTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGC
|
19
19
|
+
|
20
20
|
gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
21
|
+
@FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/2
|
22
|
+
CCCGGCCGCGAACTGCACAACAGGGAGGCGGCCCAGCTGCTTGGTCTGCATCACCAAATCATACGGCGCGGCGATGCGCT
|
23
|
+
+
|
24
|
+
___cccecec`eghfRc`efgfhaeefgiicccc^aX_]Y`bbcYbW`_XX]bbbbccccb`bbaa]_TTXTZTXT^[[_
|
25
|
+
@FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/2
|
26
|
+
TCGCTGTAGTGGGTGACAGCCTGAACGATGGCACGCGCGCGGCGAGCAGGGTCGCCACTCTTGAAGATTCCCGAGCCGACGAAG
|
27
|
+
+
|
28
|
+
___ea`e`cgegf^b`bbdgfffhdhhfhgheihfhihhiS\[RKWTL[`YX^_acccccccbcccbbc_abcR]]aXT[_]EQ
|
29
|
+
@FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/2
|
30
|
+
CATAAGCAGGGCTGGCAGAGCGACAGACATACAAACCGTTCAACATTTGATTGAGTACATGATAAACCAGGGGAGCCTAGTCTAACTTAATACAGTAGCT
|
31
|
+
+
|
32
|
+
^_accacagggabbfa]_f`Z_cgfYcZ_[aeggiihaeaeggghgdg_g__gghVb^``d`bdcaced^`__RW^^bbbcbcc]_]]b[]c]_`SY`YY
|
33
|
+
@FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/2
|
34
|
+
GTTGATGCCGACCATGGCCTCCCCGAGTCCGGCGCTGACGTCGGCGAGGATGG
|
35
|
+
+
|
36
|
+
_a_e`ccaeeegeafffhh[ffghd_cY^XZefff_d[`gGXKXW]_aaTX_^
|
37
|
+
@FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/2
|
38
|
+
GGCAGATGGATGGATGGGTGATACAAATATATGAGAGAAGATGACGATGGTGGAGCGGATAGGGTTGTTCACTCGGATCTGGCGGCGTATCGC
|
39
|
+
+
|
40
|
+
a_beeceegggggfghih`ffbgghhhffbffbfgfgfhdfdffghfghigegdgfhi_addeeZabbbbcbcbcaZ^ac[bbacXaTT^^a_
|
data/test/test_bin.rb
CHANGED
@@ -10,48 +10,47 @@ class TestTransrateBin < Test::Unit::TestCase
|
|
10
10
|
end
|
11
11
|
|
12
12
|
teardown do
|
13
|
-
files = ["150uncovered.l.fq.150uncovered.r.fq.
|
14
|
-
"150uncovered.l.fq.150uncovered.r.fq.
|
15
|
-
"
|
16
|
-
"150uncovered.l.fq.150uncovered.r.fq.assembly.2.sorted.bam",
|
17
|
-
"assembly.2.1.bt2", "assembly.2.2.bt2", "assembly.2.3.bt2",
|
18
|
-
"assembly.2.4.bt2", "assembly.2.fa.coverage",
|
13
|
+
files = ["150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam.bai",
|
14
|
+
"150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam",
|
15
|
+
"sorghum_transcript_into_Os.protein.2.1.blast",
|
19
16
|
"assembly.2_into_Os.protein.2.1.blast",
|
20
|
-
"
|
21
|
-
"
|
22
|
-
"
|
17
|
+
"sorghum_transcript.nhr", "sorghum_transcript.nin", "sorghum_transcript.nsq",
|
18
|
+
"Os.protein.2_into_sorghum_transcript.2.blast",
|
19
|
+
"assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
|
23
20
|
"Os.protein.2.phr", "Os.protein.2.pin", "Os.protein.2.psq",
|
24
|
-
"
|
25
|
-
"
|
26
|
-
"
|
21
|
+
"transrate_assemblies.csv", "params.xprs",
|
22
|
+
"sorghum_transcript.fa_results.xprs",
|
23
|
+
"sorghum_transcript.fa_bam_info.csv",
|
24
|
+
"transrate_sorghum_transcript.fa_contigs.csv",
|
25
|
+
"150uncovered.l.fq-150uncovered.r.fq-read_count.txt",
|
26
|
+
"150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.merged.sorted.bam"]
|
27
27
|
files.each do |file|
|
28
28
|
File.delete(file) if File.exist?(file)
|
29
29
|
end
|
30
|
+
`rm -rf sorghum_transcript`
|
30
31
|
end
|
31
32
|
|
32
33
|
should "run help" do
|
33
34
|
c=Transrate::Cmd.new("bundle exec bin/transrate --help")
|
34
35
|
c.run
|
35
|
-
|
36
|
+
assert c.stdout =~ /DESCRIPTION/
|
36
37
|
assert_equal true, c.status.success?, "exit status"
|
37
38
|
end
|
38
39
|
|
39
40
|
should "fail on non existent assembly files" do
|
40
41
|
c=Transrate::Cmd.new("bundle exec bin/transrate --assembly foo.fasta")
|
41
42
|
c.run
|
42
|
-
assert_equal 163, c.stderr.length, "stderr"
|
43
43
|
assert_equal false, c.status.success?, "exit success"
|
44
44
|
end
|
45
45
|
|
46
46
|
should "fail on non existent reference files" do
|
47
47
|
c=Transrate::Cmd.new("bundle exec bin/transrate --reference foo.fasta")
|
48
48
|
c.run
|
49
|
-
assert_equal 104, c.stderr.length, "error"
|
50
49
|
assert_equal false, c.status.success?, "exit status"
|
51
50
|
end
|
52
51
|
|
53
52
|
should "run on test data" do
|
54
|
-
assembly = File.join(File.dirname(__FILE__), 'data', '
|
53
|
+
assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
|
55
54
|
reference = File.join(File.dirname(__FILE__), 'data', 'Os.protein.2.fa')
|
56
55
|
left = File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
|
57
56
|
right = File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
|
@@ -62,8 +61,9 @@ class TestTransrateBin < Test::Unit::TestCase
|
|
62
61
|
c = Transrate::Cmd.new("#{cmd}")
|
63
62
|
c.run
|
64
63
|
assert_equal true, c.status.success?, "exit status"
|
65
|
-
assert File.exist?("transrate_assemblies.csv"), "csv file doesn't
|
66
|
-
assert File.exist?("
|
64
|
+
assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exist"
|
65
|
+
assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
|
66
|
+
"contig csv file doesn't exist"
|
67
67
|
hash = {}
|
68
68
|
CSV.foreach("transrate_assemblies.csv", :headers => true,
|
69
69
|
:header_converters => :symbol,
|
@@ -74,9 +74,38 @@ class TestTransrateBin < Test::Unit::TestCase
|
|
74
74
|
hash[header]=field
|
75
75
|
end
|
76
76
|
end
|
77
|
-
assert_equal
|
78
|
-
assert_equal
|
79
|
-
assert_equal
|
77
|
+
assert_equal 1555, hash[:n_bases], "number of bases"
|
78
|
+
assert_equal 823, hash[:n50], "n50"
|
79
|
+
assert_equal 0, hash[:n_refs_with_crbb], "number of crb hits"
|
80
|
+
end
|
81
|
+
|
82
|
+
should "run on test data with comma separated list of fastq files" do
|
83
|
+
assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
|
84
|
+
left = []
|
85
|
+
right = []
|
86
|
+
left << File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
|
87
|
+
left << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.l.fastq')
|
88
|
+
right << File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
|
89
|
+
right << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.r.fastq')
|
90
|
+
cmd = "bundle exec bin/transrate --assembly #{assembly}"
|
91
|
+
cmd << " --left #{left.join(",")}"
|
92
|
+
cmd << " --right #{right.join(",")}"
|
93
|
+
c = Transrate::Cmd.new("#{cmd}")
|
94
|
+
c.run
|
95
|
+
assert_equal true, c.status.success?, "exit status"
|
96
|
+
assert File.exist?("transrate_assemblies.csv")
|
97
|
+
assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
|
98
|
+
"contig csv file doesn't exist"
|
99
|
+
hash = {}
|
100
|
+
CSV.foreach("transrate_assemblies.csv", :headers => true,
|
101
|
+
:header_converters => :symbol,
|
102
|
+
:converters => :all) do |row|
|
103
|
+
row.headers.zip(row.fields).each do |header, field|
|
104
|
+
hash[header]=field
|
105
|
+
end
|
106
|
+
end
|
107
|
+
assert_equal 1555, hash[:n_bases], "number of bases"
|
108
|
+
assert_equal 823, hash[:n50], "n50"
|
80
109
|
end
|
81
110
|
|
82
111
|
should "fail when one of multiple assemblies is missing" do
|