transrate 0.3.1 → 1.0.0.alpha.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +7 -0
- data/README.md +7 -6
- data/bin/transrate +21 -9
- data/deps/deps.yaml +49 -14
- data/ext/transrate/transrate.c +200 -166
- data/lib/transrate.rb +2 -3
- data/lib/transrate/assembly.rb +0 -49
- data/lib/transrate/cmd.rb +4 -0
- data/lib/transrate/comparative_metrics.rb +16 -64
- data/lib/transrate/contig.rb +57 -18
- data/lib/transrate/express.rb +79 -0
- data/lib/transrate/read_metrics.rb +196 -185
- data/lib/transrate/samtools.rb +88 -16
- data/lib/transrate/snap.rb +123 -0
- data/lib/transrate/transrater.rb +16 -19
- data/lib/transrate/version.rb +4 -4
- data/test/data/bridging_reads.l.fastq +20 -0
- data/test/data/bridging_reads.r.fastq +20 -0
- data/test/test_bin.rb +50 -21
- data/test/test_comp_metrics.rb +3 -27
- data/test/test_contig.rb +8 -0
- data/test/test_inline.rb +1 -1
- data/test/test_read_metrics.rb +108 -19
- data/test/test_transrater.rb +5 -5
- data/transrate.gemspec +2 -5
- metadata +66 -129
- data/lib/transrate/bowtie2.rb +0 -75
- data/lib/transrate/dimension_reduce.rb +0 -18
- data/lib/transrate/metric.rb +0 -16
- data/test/test_bowtie.rb +0 -66
data/lib/transrate/samtools.rb
CHANGED
@@ -1,20 +1,31 @@
|
|
1
|
-
require 'bio-samtools'
|
2
|
-
|
3
1
|
module Transrate
|
4
2
|
|
5
3
|
class Samtools
|
6
4
|
|
5
|
+
class SamtoolsError < StandardError; end
|
6
|
+
|
7
7
|
# Get the path to the samtools binary built when bio-samtools
|
8
8
|
# was installed
|
9
9
|
def self.path
|
10
|
-
|
11
|
-
|
10
|
+
if !@path
|
11
|
+
which_samtools = Cmd.new("which samtools")
|
12
|
+
which_samtools.run
|
13
|
+
if !which_samtools.status.success?
|
14
|
+
raise SamtoolsError.new("could not find samtools in the path")
|
15
|
+
end
|
16
|
+
@path = which_samtools.stdout.split("\n").first
|
17
|
+
end
|
18
|
+
return @path
|
12
19
|
end
|
13
20
|
|
14
21
|
# Run a samtools command
|
15
22
|
def self.run cmd
|
16
23
|
runcmd = Cmd.new "#{Samtools.path} #{cmd}"
|
17
24
|
runcmd.run
|
25
|
+
if !runcmd.status.success?
|
26
|
+
logger.warn "Samtools command failed: #{runcmd}" +
|
27
|
+
"\n#{runcmd.stderr}"
|
28
|
+
end
|
18
29
|
runcmd.stdout
|
19
30
|
end
|
20
31
|
|
@@ -22,20 +33,45 @@ module Transrate
|
|
22
33
|
def self.sam_to_bam samfile
|
23
34
|
bamfile = File.basename(samfile, '.sam') + '.bam'
|
24
35
|
bamfile = File.expand_path bamfile
|
25
|
-
|
26
|
-
|
36
|
+
if !File.exist?(bamfile)
|
37
|
+
Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
|
38
|
+
end
|
39
|
+
bamfile
|
27
40
|
end
|
28
41
|
|
29
42
|
# Sort a bam file, returning the path to the sorted bamfile
|
30
|
-
def self.sort_bam bamfile
|
43
|
+
def self.sort_bam bamfile, threads=4
|
44
|
+
# the sort command behaves inconsistently with the other commands:
|
45
|
+
# it takes an output prefix rather than a filename
|
46
|
+
# and automatically adds the .bam extension
|
47
|
+
sorted = File.basename(bamfile, '.bam') + '.sorted'
|
48
|
+
if !File.exist?("#{sorted}.bam")
|
49
|
+
cmd = "sort"
|
50
|
+
cmd << " -l #{threads}"
|
51
|
+
cmd << " #{File.expand_path bamfile} #{sorted}"
|
52
|
+
Samtools.run cmd
|
53
|
+
end
|
54
|
+
File.expand_path(sorted + '.bam')
|
55
|
+
end
|
56
|
+
|
57
|
+
# Sort a bam file by readname only, returning the path to th
|
58
|
+
# sorted bamfile
|
59
|
+
def self.readsort_bam bamfile, threads=4
|
31
60
|
# the sort command behaves inconsistently with the other commands:
|
32
61
|
# it takes an output prefix rather than a filename
|
33
62
|
# and automatically adds the .bam extension
|
34
63
|
sorted = File.basename(bamfile, '.bam') + '.sorted'
|
35
|
-
|
64
|
+
if !File.exist?("#{sorted}.bam")
|
65
|
+
cmd = "sort"
|
66
|
+
cmd << " -l #{threads}"
|
67
|
+
cmd << " -n" # sort by read name only
|
68
|
+
cmd << " #{File.expand_path bamfile} #{sorted}"
|
69
|
+
Samtools.run cmd
|
70
|
+
end
|
36
71
|
File.expand_path(sorted + '.bam')
|
37
72
|
end
|
38
73
|
|
74
|
+
|
39
75
|
# Index a bamfile, returning the path to the index
|
40
76
|
def self.index_bam bamfile
|
41
77
|
index = File.basename(bamfile, '.bam') + '.bai'
|
@@ -57,17 +93,53 @@ module Transrate
|
|
57
93
|
# return the path to the coverage file
|
58
94
|
def self.coverage bam
|
59
95
|
outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
96
|
+
if !File.exist?(outfile)
|
97
|
+
cmd = "mpileup"
|
98
|
+
cmd += " -f #{File.expand_path bam.fasta}" # reference
|
99
|
+
cmd += " -B" # don't calculate BAQ quality scores
|
100
|
+
cmd += " -Q0" # include all reads ignoring quality
|
101
|
+
cmd += " -I" # don't do genotype calculations
|
102
|
+
cmd += " #{File.expand_path bam.bam}" # the bam file
|
103
|
+
cmd += " > #{outfile}"
|
104
|
+
Samtools.run cmd
|
105
|
+
end
|
106
|
+
outfile
|
107
|
+
end
|
108
|
+
|
109
|
+
# Calculate per-base coverage and mapQ score from a sorted, indexed
|
110
|
+
# bam file. Return the path to the coverage file.
|
111
|
+
def self.bam_to_bcf(bam, fasta)
|
112
|
+
outfile = File.expand_path "#{File.basename(fasta)}.bcf"
|
113
|
+
if !File.exist?(outfile)
|
114
|
+
cmd = "samtools mpileup"
|
115
|
+
cmd << " -f #{File.expand_path fasta}" # reference
|
116
|
+
cmd << " -B" # don't calculate BAQ quality scores
|
117
|
+
cmd << " -q0" # include all multimapping reads
|
118
|
+
cmd << " -Q0" # include all reads ignoring quality
|
119
|
+
cmd << " -I" # don't do genotype calculations
|
120
|
+
cmd << " -u" # output uncompressed bcf format
|
121
|
+
cmd << " #{File.expand_path bam}" # the bam file
|
122
|
+
cmd << " | bcftools view -cg - "
|
123
|
+
cmd << " > #{outfile}"
|
124
|
+
mpileup = Cmd.new cmd
|
125
|
+
mpileup.run
|
126
|
+
if !mpileup.status.success?
|
127
|
+
raise RuntimeError.new("samtools and bcftools failed")
|
128
|
+
end
|
129
|
+
end
|
68
130
|
outfile
|
69
131
|
end
|
70
132
|
|
133
|
+
def self.merge_bam left, right, out, threads=1
|
134
|
+
cmd = "merge"
|
135
|
+
cmd << " -@ #{threads}"
|
136
|
+
cmd << " #{out}"
|
137
|
+
cmd << " #{left}"
|
138
|
+
cmd << " #{right}"
|
139
|
+
Samtools.run cmd
|
140
|
+
out
|
141
|
+
end
|
142
|
+
|
71
143
|
end
|
72
144
|
|
73
145
|
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class SnapError < StandardError
|
4
|
+
end
|
5
|
+
|
6
|
+
class Snap
|
7
|
+
|
8
|
+
attr_reader :index_name, :sam, :read_count
|
9
|
+
|
10
|
+
def initialize
|
11
|
+
which_snap = Cmd.new('which snap')
|
12
|
+
which_snap.run
|
13
|
+
if !which_snap.status.success?
|
14
|
+
raise SnapError.new("could not find snap in the path")
|
15
|
+
end
|
16
|
+
@snap = which_snap.stdout.split("\n").first
|
17
|
+
|
18
|
+
@index_built = false
|
19
|
+
@index_name = ""
|
20
|
+
end
|
21
|
+
|
22
|
+
def build_paired_cmd l, r, threads
|
23
|
+
cmd = "#{@snap} paired #{@index_name}"
|
24
|
+
l.split(",").zip(r.split(",")).each do |left, right|
|
25
|
+
cmd << " #{left} #{right}"
|
26
|
+
end
|
27
|
+
# NOTE: do NOT turn on the -so flag (sort bam output)
|
28
|
+
# it violates the basic assumption of eXpress's streaming
|
29
|
+
# algorithm: that the fragments are observed in approximately
|
30
|
+
# random order.
|
31
|
+
cmd << " -o #{@bam}"
|
32
|
+
cmd << " -s 0 1000" # min and max distance between paired-read starts
|
33
|
+
cmd << " -H 300000" # max seed hits to consider in paired mode
|
34
|
+
cmd << " -h 2000" # max seed hits to consider when reverting to single
|
35
|
+
cmd << " -I" # ignore read IDs
|
36
|
+
cmd << " -d 30" # max edit distance (function of read length?)
|
37
|
+
cmd << " -t #{threads}"
|
38
|
+
cmd << " -b" # bind threads to cores
|
39
|
+
cmd << " -M" # format cigar string
|
40
|
+
cmd << " -sa" # keep all alignments, don't discard 0x100
|
41
|
+
# cmd << " -C++" # trim low-quality bases from front and back of reads
|
42
|
+
cmd
|
43
|
+
end
|
44
|
+
|
45
|
+
def map_reads(file, left, right, insertsize: 200,
|
46
|
+
insertsd: 50, outputname: nil, threads: 8)
|
47
|
+
raise SnapError.new("Index not built") if !@index_built
|
48
|
+
|
49
|
+
lbase = File.basename(left.split(",").first)
|
50
|
+
rbase = File.basename(right.split(",").first)
|
51
|
+
index = File.basename(@index_name)
|
52
|
+
@bam = File.expand_path("#{lbase}.#{rbase}.#{index}.bam")
|
53
|
+
@read_count_file = "#{lbase}-#{rbase}-read_count.txt"
|
54
|
+
|
55
|
+
unless File.exists? @bam
|
56
|
+
snapcmd = build_paired_cmd(left, right, threads)
|
57
|
+
runner = Cmd.new snapcmd
|
58
|
+
runner.run
|
59
|
+
save_readcount runner.stdout
|
60
|
+
unless runner.status.success?
|
61
|
+
raise SnapError.new("Snap failed\n#{runner.stderr}")
|
62
|
+
end
|
63
|
+
else
|
64
|
+
load_readcount left
|
65
|
+
end
|
66
|
+
@bam
|
67
|
+
end
|
68
|
+
|
69
|
+
def save_readcount stdout
|
70
|
+
stdout.split("\n").each do |line|
|
71
|
+
cols = line.split(/\s+/)
|
72
|
+
if cols[0]=="2000" and cols[1]=="30"
|
73
|
+
@read_count = cols[8].to_i / 2
|
74
|
+
File.open("#{@read_count_file}", "wb") do |out|
|
75
|
+
out.write("#{@read_count}\n")
|
76
|
+
end
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def load_readcount reads
|
82
|
+
@read_count = 0
|
83
|
+
if File.exist?("#{@read_count_file}")
|
84
|
+
@read_count = File.open("#{@read_count_file}").readlines.join.to_i
|
85
|
+
else
|
86
|
+
reads.split(",").each do |l|
|
87
|
+
cmd = "wc -l #{l}"
|
88
|
+
count = Cmd.new(cmd)
|
89
|
+
count.run
|
90
|
+
if count.status.success?
|
91
|
+
@read_count += count.stdout.strip.split(/\s+/).first.to_i/4
|
92
|
+
File.open("#{@read_count_file}", "wb") do |out|
|
93
|
+
out.write("#{@read_count}\n")
|
94
|
+
end
|
95
|
+
else
|
96
|
+
logger.warn "couldn't get number of reads from #{l}"
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def build_index file, threads
|
103
|
+
@index_name = File.basename(file, File.extname(file))
|
104
|
+
unless Dir.exists?(@index_name)
|
105
|
+
overflow = 500
|
106
|
+
cmd = "#{@snap} index #{file} #{@index_name}"
|
107
|
+
cmd << " -s 23"
|
108
|
+
cmd << " -t#{threads}"
|
109
|
+
cmd << " -bSpace" # contig name terminates with space char
|
110
|
+
runner = Cmd.new cmd
|
111
|
+
runner.run
|
112
|
+
if !runner.status.success?
|
113
|
+
err = runner.stderr
|
114
|
+
msg = "Failed to build Snap index\n#{runner.stderr}"
|
115
|
+
raise SnapError.new(msg)
|
116
|
+
end
|
117
|
+
end
|
118
|
+
@index_built = true
|
119
|
+
end
|
120
|
+
|
121
|
+
end # Snap
|
122
|
+
|
123
|
+
end # Transrate
|
data/lib/transrate/transrater.rb
CHANGED
@@ -22,10 +22,8 @@ module Transrate
|
|
22
22
|
# @param right [String] path to the right reads
|
23
23
|
# @param insertsize [Integer] mean insert size of the read pairs
|
24
24
|
# @param insertsd [Integer] standard deviation of the read pair insert size
|
25
|
-
def initialize(assembly, reference,
|
26
|
-
|
27
|
-
insertsize: nil, insertsd: nil,
|
28
|
-
threads: 1)
|
25
|
+
def initialize(assembly, reference, left: nil, right: nil,
|
26
|
+
insertsize: nil, insertsd: nil, threads: 1)
|
29
27
|
if assembly
|
30
28
|
if assembly.is_a?(Assembly)
|
31
29
|
@assembly = assembly
|
@@ -64,23 +62,22 @@ module Transrate
|
|
64
62
|
comparative_metrics
|
65
63
|
end
|
66
64
|
|
67
|
-
#
|
68
|
-
|
69
|
-
|
70
|
-
|
65
|
+
# Calculate the geometric mean of an array of numbers
|
66
|
+
def geomean(x)
|
67
|
+
sum = 0.0
|
68
|
+
x.each{ |v| sum += Math.log(v) }
|
69
|
+
sum /= x.size
|
70
|
+
Math.exp(sum)
|
71
|
+
end
|
72
|
+
|
73
|
+
# Reduce all metrics for the assembly to a single quality score
|
74
|
+
# by taking the geometric mean of the scores for all contigs
|
75
|
+
# and multiplying it by the proportion of fragments whose most likely
|
76
|
+
# mapping is consistent with the assembly
|
71
77
|
# @return [Integer] the assembly score
|
72
78
|
def assembly_score
|
73
|
-
@score
|
74
|
-
|
75
|
-
pg = Metric.new('pg', @read_metrics.pr_good_mapping, 0.0)
|
76
|
-
end
|
77
|
-
if @comparative_metrics.has_run
|
78
|
-
rc = Metric.new('rc', @comparative_metrics.reference_coverage, 0.0)
|
79
|
-
end
|
80
|
-
if (pg && rc)
|
81
|
-
@score = DimensionReduce.dimension_reduce([pg, rc])
|
82
|
-
end
|
83
|
-
return @score
|
79
|
+
@score = geomean assembly.assembly.values.map{ |contig| contig.score }
|
80
|
+
return @score * @read_metrics.p_good_mapping
|
84
81
|
end
|
85
82
|
|
86
83
|
def assembly_metrics
|
data/lib/transrate/version.rb
CHANGED
@@ -8,10 +8,10 @@ module Transrate
|
|
8
8
|
# the Gem. Versions must be incremented in accordance with
|
9
9
|
# Semantic Versioning 2.0 (http://semver.org/).
|
10
10
|
module VERSION
|
11
|
-
MAJOR =
|
12
|
-
MINOR =
|
13
|
-
PATCH =
|
14
|
-
BUILD =
|
11
|
+
MAJOR = 1
|
12
|
+
MINOR = 0
|
13
|
+
PATCH = 0
|
14
|
+
BUILD = 'alpha.1'
|
15
15
|
|
16
16
|
STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
|
17
17
|
end
|
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
|
18
18
|
ACGACGCGCACCACATCAACAAGCACAACTTCCGTGTCCCCTTCGTCTGTGGCTGCCGTGACCTTGGTGAGGCGCTCCGGAGGGTCCGTGAGGGCGCCGC
|
19
19
|
+
|
20
20
|
gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
21
|
+
@FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/1
|
22
|
+
GTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGCTAAGCGCATCGCCG
|
23
|
+
+
|
24
|
+
_[_eeeeegebe`efddgaeehhhhfgfQ^eb`adfgfffhZ_db\adb`aaabaWQT]`b_b]Y_b`cbbb`]`][]^]aaX^bbdbbY^[_a_[[O][
|
25
|
+
@FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/1
|
26
|
+
GGCCACGCCTGCTGATGCCGCGCTCATGATGCAGCTTGGGTGCGACGGCGTCTTCGTCGGCTC
|
27
|
+
+
|
28
|
+
bbbeeeeef`gcghcegdghfhfdfdfdghhfgfhhhhhgG_Z`__`c[_RZa^baX^aaZ_a
|
29
|
+
@FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/1
|
30
|
+
CTCATGTCTCTCCATCAGTAATAACGGGACTGAATCAATGGTAAGAAACAAAGCTATGGTACTATGCAGAACCTCTTTATTTTT
|
31
|
+
+
|
32
|
+
__^cc`\cecabcdea_d[`b^[abaUeW^a_e[S^afS^^Y^acbfedeb]^cacefh_V\W\efbddYcd`deV\aRV^ccc
|
33
|
+
@FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/1
|
34
|
+
TATGATTTGGTGATGCAGACCAAGCAGCTGGGCCGCCTCCCTGTTGTGCAGTTCGCGGCCGGGGGTGTGGCCACGCCTGCTGATGCCGCGCTCATGATGC
|
35
|
+
+
|
36
|
+
__[cccceggbeeddghhdhhhhhdhhfbfhhhhhhhhhhhhhghfefg_fdcgbdaabaaa_[]LEO^^^``[X_^^`ab`]]_bbac_X]XEYR_b]_
|
37
|
+
@FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/1
|
38
|
+
GTTCAGGCTGTCACCCACTACAGCGACCCTACCATCCTCGCCGACGTCAGCGCCGGACTCGGGGAGGCCATGGTCGGCATCAACCTCAACGACCCTA
|
39
|
+
+
|
40
|
+
b_beeeeeggggfiihhihihifhiiiiiiihfhefhiihiiiiigeedcdcca^cacccacccccc_acbbbS]][_[ab_`X`]`bcc[[X_a^b
|
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
|
18
18
|
GGACCGGCAACATCGTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGC
|
19
19
|
+
|
20
20
|
gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
|
21
|
+
@FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/2
|
22
|
+
CCCGGCCGCGAACTGCACAACAGGGAGGCGGCCCAGCTGCTTGGTCTGCATCACCAAATCATACGGCGCGGCGATGCGCT
|
23
|
+
+
|
24
|
+
___cccecec`eghfRc`efgfhaeefgiicccc^aX_]Y`bbcYbW`_XX]bbbbccccb`bbaa]_TTXTZTXT^[[_
|
25
|
+
@FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/2
|
26
|
+
TCGCTGTAGTGGGTGACAGCCTGAACGATGGCACGCGCGCGGCGAGCAGGGTCGCCACTCTTGAAGATTCCCGAGCCGACGAAG
|
27
|
+
+
|
28
|
+
___ea`e`cgegf^b`bbdgfffhdhhfhgheihfhihhiS\[RKWTL[`YX^_acccccccbcccbbc_abcR]]aXT[_]EQ
|
29
|
+
@FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/2
|
30
|
+
CATAAGCAGGGCTGGCAGAGCGACAGACATACAAACCGTTCAACATTTGATTGAGTACATGATAAACCAGGGGAGCCTAGTCTAACTTAATACAGTAGCT
|
31
|
+
+
|
32
|
+
^_accacagggabbfa]_f`Z_cgfYcZ_[aeggiihaeaeggghgdg_g__gghVb^``d`bdcaced^`__RW^^bbbcbcc]_]]b[]c]_`SY`YY
|
33
|
+
@FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/2
|
34
|
+
GTTGATGCCGACCATGGCCTCCCCGAGTCCGGCGCTGACGTCGGCGAGGATGG
|
35
|
+
+
|
36
|
+
_a_e`ccaeeegeafffhh[ffghd_cY^XZefff_d[`gGXKXW]_aaTX_^
|
37
|
+
@FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/2
|
38
|
+
GGCAGATGGATGGATGGGTGATACAAATATATGAGAGAAGATGACGATGGTGGAGCGGATAGGGTTGTTCACTCGGATCTGGCGGCGTATCGC
|
39
|
+
+
|
40
|
+
a_beeceegggggfghih`ffbgghhhffbffbfgfgfhdfdffghfghigegdgfhi_addeeZabbbbcbcbcaZ^ac[bbacXaTT^^a_
|
data/test/test_bin.rb
CHANGED
@@ -10,48 +10,47 @@ class TestTransrateBin < Test::Unit::TestCase
|
|
10
10
|
end
|
11
11
|
|
12
12
|
teardown do
|
13
|
-
files = ["150uncovered.l.fq.150uncovered.r.fq.
|
14
|
-
"150uncovered.l.fq.150uncovered.r.fq.
|
15
|
-
"
|
16
|
-
"150uncovered.l.fq.150uncovered.r.fq.assembly.2.sorted.bam",
|
17
|
-
"assembly.2.1.bt2", "assembly.2.2.bt2", "assembly.2.3.bt2",
|
18
|
-
"assembly.2.4.bt2", "assembly.2.fa.coverage",
|
13
|
+
files = ["150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam.bai",
|
14
|
+
"150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam",
|
15
|
+
"sorghum_transcript_into_Os.protein.2.1.blast",
|
19
16
|
"assembly.2_into_Os.protein.2.1.blast",
|
20
|
-
"
|
21
|
-
"
|
22
|
-
"
|
17
|
+
"sorghum_transcript.nhr", "sorghum_transcript.nin", "sorghum_transcript.nsq",
|
18
|
+
"Os.protein.2_into_sorghum_transcript.2.blast",
|
19
|
+
"assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
|
23
20
|
"Os.protein.2.phr", "Os.protein.2.pin", "Os.protein.2.psq",
|
24
|
-
"
|
25
|
-
"
|
26
|
-
"
|
21
|
+
"transrate_assemblies.csv", "params.xprs",
|
22
|
+
"sorghum_transcript.fa_results.xprs",
|
23
|
+
"sorghum_transcript.fa_bam_info.csv",
|
24
|
+
"transrate_sorghum_transcript.fa_contigs.csv",
|
25
|
+
"150uncovered.l.fq-150uncovered.r.fq-read_count.txt",
|
26
|
+
"150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.merged.sorted.bam"]
|
27
27
|
files.each do |file|
|
28
28
|
File.delete(file) if File.exist?(file)
|
29
29
|
end
|
30
|
+
`rm -rf sorghum_transcript`
|
30
31
|
end
|
31
32
|
|
32
33
|
should "run help" do
|
33
34
|
c=Transrate::Cmd.new("bundle exec bin/transrate --help")
|
34
35
|
c.run
|
35
|
-
|
36
|
+
assert c.stdout =~ /DESCRIPTION/
|
36
37
|
assert_equal true, c.status.success?, "exit status"
|
37
38
|
end
|
38
39
|
|
39
40
|
should "fail on non existent assembly files" do
|
40
41
|
c=Transrate::Cmd.new("bundle exec bin/transrate --assembly foo.fasta")
|
41
42
|
c.run
|
42
|
-
assert_equal 163, c.stderr.length, "stderr"
|
43
43
|
assert_equal false, c.status.success?, "exit success"
|
44
44
|
end
|
45
45
|
|
46
46
|
should "fail on non existent reference files" do
|
47
47
|
c=Transrate::Cmd.new("bundle exec bin/transrate --reference foo.fasta")
|
48
48
|
c.run
|
49
|
-
assert_equal 104, c.stderr.length, "error"
|
50
49
|
assert_equal false, c.status.success?, "exit status"
|
51
50
|
end
|
52
51
|
|
53
52
|
should "run on test data" do
|
54
|
-
assembly = File.join(File.dirname(__FILE__), 'data', '
|
53
|
+
assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
|
55
54
|
reference = File.join(File.dirname(__FILE__), 'data', 'Os.protein.2.fa')
|
56
55
|
left = File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
|
57
56
|
right = File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
|
@@ -62,8 +61,9 @@ class TestTransrateBin < Test::Unit::TestCase
|
|
62
61
|
c = Transrate::Cmd.new("#{cmd}")
|
63
62
|
c.run
|
64
63
|
assert_equal true, c.status.success?, "exit status"
|
65
|
-
assert File.exist?("transrate_assemblies.csv"), "csv file doesn't
|
66
|
-
assert File.exist?("
|
64
|
+
assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exist"
|
65
|
+
assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
|
66
|
+
"contig csv file doesn't exist"
|
67
67
|
hash = {}
|
68
68
|
CSV.foreach("transrate_assemblies.csv", :headers => true,
|
69
69
|
:header_converters => :symbol,
|
@@ -74,9 +74,38 @@ class TestTransrateBin < Test::Unit::TestCase
|
|
74
74
|
hash[header]=field
|
75
75
|
end
|
76
76
|
end
|
77
|
-
assert_equal
|
78
|
-
assert_equal
|
79
|
-
assert_equal
|
77
|
+
assert_equal 1555, hash[:n_bases], "number of bases"
|
78
|
+
assert_equal 823, hash[:n50], "n50"
|
79
|
+
assert_equal 0, hash[:n_refs_with_crbb], "number of crb hits"
|
80
|
+
end
|
81
|
+
|
82
|
+
should "run on test data with comma separated list of fastq files" do
|
83
|
+
assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
|
84
|
+
left = []
|
85
|
+
right = []
|
86
|
+
left << File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
|
87
|
+
left << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.l.fastq')
|
88
|
+
right << File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
|
89
|
+
right << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.r.fastq')
|
90
|
+
cmd = "bundle exec bin/transrate --assembly #{assembly}"
|
91
|
+
cmd << " --left #{left.join(",")}"
|
92
|
+
cmd << " --right #{right.join(",")}"
|
93
|
+
c = Transrate::Cmd.new("#{cmd}")
|
94
|
+
c.run
|
95
|
+
assert_equal true, c.status.success?, "exit status"
|
96
|
+
assert File.exist?("transrate_assemblies.csv")
|
97
|
+
assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
|
98
|
+
"contig csv file doesn't exist"
|
99
|
+
hash = {}
|
100
|
+
CSV.foreach("transrate_assemblies.csv", :headers => true,
|
101
|
+
:header_converters => :symbol,
|
102
|
+
:converters => :all) do |row|
|
103
|
+
row.headers.zip(row.fields).each do |header, field|
|
104
|
+
hash[header]=field
|
105
|
+
end
|
106
|
+
end
|
107
|
+
assert_equal 1555, hash[:n_bases], "number of bases"
|
108
|
+
assert_equal 823, hash[:n50], "n50"
|
80
109
|
end
|
81
110
|
|
82
111
|
should "fail when one of multiple assemblies is missing" do
|