transrate 0.3.1 → 1.0.0.alpha.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,20 +1,31 @@
1
- require 'bio-samtools'
2
-
3
1
  module Transrate
4
2
 
5
3
  class Samtools
6
4
 
5
+ class SamtoolsError < StandardError; end
6
+
7
7
  # Get the path to the samtools binary built when bio-samtools
8
8
  # was installed
9
9
  def self.path
10
- gem_path = Gem.loaded_specs['bio-samtools'].full_gem_path
11
- return File.join(gem_path, 'lib/bio/db/sam/external/samtools')
10
+ if !@path
11
+ which_samtools = Cmd.new("which samtools")
12
+ which_samtools.run
13
+ if !which_samtools.status.success?
14
+ raise SamtoolsError.new("could not find samtools in the path")
15
+ end
16
+ @path = which_samtools.stdout.split("\n").first
17
+ end
18
+ return @path
12
19
  end
13
20
 
14
21
  # Run a samtools command
15
22
  def self.run cmd
16
23
  runcmd = Cmd.new "#{Samtools.path} #{cmd}"
17
24
  runcmd.run
25
+ if !runcmd.status.success?
26
+ logger.warn "Samtools command failed: #{runcmd}" +
27
+ "\n#{runcmd.stderr}"
28
+ end
18
29
  runcmd.stdout
19
30
  end
20
31
 
@@ -22,20 +33,45 @@ module Transrate
22
33
  def self.sam_to_bam samfile
23
34
  bamfile = File.basename(samfile, '.sam') + '.bam'
24
35
  bamfile = File.expand_path bamfile
25
- Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
26
- File.expand_path bamfile
36
+ if !File.exist?(bamfile)
37
+ Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
38
+ end
39
+ bamfile
27
40
  end
28
41
 
29
42
  # Sort a bam file, returning the path to the sorted bamfile
30
- def self.sort_bam bamfile
43
+ def self.sort_bam bamfile, threads=4
44
+ # the sort command behaves inconsistently with the other commands:
45
+ # it takes an output prefix rather than a filename
46
+ # and automatically adds the .bam extension
47
+ sorted = File.basename(bamfile, '.bam') + '.sorted'
48
+ if !File.exist?("#{sorted}.bam")
49
+ cmd = "sort"
50
+ cmd << " -l #{threads}"
51
+ cmd << " #{File.expand_path bamfile} #{sorted}"
52
+ Samtools.run cmd
53
+ end
54
+ File.expand_path(sorted + '.bam')
55
+ end
56
+
57
+ # Sort a bam file by readname only, returning the path to th
58
+ # sorted bamfile
59
+ def self.readsort_bam bamfile, threads=4
31
60
  # the sort command behaves inconsistently with the other commands:
32
61
  # it takes an output prefix rather than a filename
33
62
  # and automatically adds the .bam extension
34
63
  sorted = File.basename(bamfile, '.bam') + '.sorted'
35
- Samtools.run "sort #{File.expand_path bamfile} #{sorted}"
64
+ if !File.exist?("#{sorted}.bam")
65
+ cmd = "sort"
66
+ cmd << " -l #{threads}"
67
+ cmd << " -n" # sort by read name only
68
+ cmd << " #{File.expand_path bamfile} #{sorted}"
69
+ Samtools.run cmd
70
+ end
36
71
  File.expand_path(sorted + '.bam')
37
72
  end
38
73
 
74
+
39
75
  # Index a bamfile, returning the path to the index
40
76
  def self.index_bam bamfile
41
77
  index = File.basename(bamfile, '.bam') + '.bai'
@@ -57,17 +93,53 @@ module Transrate
57
93
  # return the path to the coverage file
58
94
  def self.coverage bam
59
95
  outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
60
- cmd = "mpileup"
61
- cmd += " -f #{File.expand_path bam.fasta}" # reference
62
- cmd += " -B" # don't calculate BAQ quality scores
63
- cmd += " -Q0" # include all reads ignoring quality
64
- cmd += " -I" # don't do genotype calculations
65
- cmd += " #{File.expand_path bam.bam}" # the bam file
66
- cmd += " > #{outfile}"
67
- Samtools.run cmd
96
+ if !File.exist?(outfile)
97
+ cmd = "mpileup"
98
+ cmd += " -f #{File.expand_path bam.fasta}" # reference
99
+ cmd += " -B" # don't calculate BAQ quality scores
100
+ cmd += " -Q0" # include all reads ignoring quality
101
+ cmd += " -I" # don't do genotype calculations
102
+ cmd += " #{File.expand_path bam.bam}" # the bam file
103
+ cmd += " > #{outfile}"
104
+ Samtools.run cmd
105
+ end
106
+ outfile
107
+ end
108
+
109
+ # Calculate per-base coverage and mapQ score from a sorted, indexed
110
+ # bam file. Return the path to the coverage file.
111
+ def self.bam_to_bcf(bam, fasta)
112
+ outfile = File.expand_path "#{File.basename(fasta)}.bcf"
113
+ if !File.exist?(outfile)
114
+ cmd = "samtools mpileup"
115
+ cmd << " -f #{File.expand_path fasta}" # reference
116
+ cmd << " -B" # don't calculate BAQ quality scores
117
+ cmd << " -q0" # include all multimapping reads
118
+ cmd << " -Q0" # include all reads ignoring quality
119
+ cmd << " -I" # don't do genotype calculations
120
+ cmd << " -u" # output uncompressed bcf format
121
+ cmd << " #{File.expand_path bam}" # the bam file
122
+ cmd << " | bcftools view -cg - "
123
+ cmd << " > #{outfile}"
124
+ mpileup = Cmd.new cmd
125
+ mpileup.run
126
+ if !mpileup.status.success?
127
+ raise RuntimeError.new("samtools and bcftools failed")
128
+ end
129
+ end
68
130
  outfile
69
131
  end
70
132
 
133
+ def self.merge_bam left, right, out, threads=1
134
+ cmd = "merge"
135
+ cmd << " -@ #{threads}"
136
+ cmd << " #{out}"
137
+ cmd << " #{left}"
138
+ cmd << " #{right}"
139
+ Samtools.run cmd
140
+ out
141
+ end
142
+
71
143
  end
72
144
 
73
145
  end
@@ -0,0 +1,123 @@
1
+ module Transrate
2
+
3
+ class SnapError < StandardError
4
+ end
5
+
6
+ class Snap
7
+
8
+ attr_reader :index_name, :sam, :read_count
9
+
10
+ def initialize
11
+ which_snap = Cmd.new('which snap')
12
+ which_snap.run
13
+ if !which_snap.status.success?
14
+ raise SnapError.new("could not find snap in the path")
15
+ end
16
+ @snap = which_snap.stdout.split("\n").first
17
+
18
+ @index_built = false
19
+ @index_name = ""
20
+ end
21
+
22
+ def build_paired_cmd l, r, threads
23
+ cmd = "#{@snap} paired #{@index_name}"
24
+ l.split(",").zip(r.split(",")).each do |left, right|
25
+ cmd << " #{left} #{right}"
26
+ end
27
+ # NOTE: do NOT turn on the -so flag (sort bam output)
28
+ # it violates the basic assumption of eXpress's streaming
29
+ # algorithm: that the fragments are observed in approximately
30
+ # random order.
31
+ cmd << " -o #{@bam}"
32
+ cmd << " -s 0 1000" # min and max distance between paired-read starts
33
+ cmd << " -H 300000" # max seed hits to consider in paired mode
34
+ cmd << " -h 2000" # max seed hits to consider when reverting to single
35
+ cmd << " -I" # ignore read IDs
36
+ cmd << " -d 30" # max edit distance (function of read length?)
37
+ cmd << " -t #{threads}"
38
+ cmd << " -b" # bind threads to cores
39
+ cmd << " -M" # format cigar string
40
+ cmd << " -sa" # keep all alignments, don't discard 0x100
41
+ # cmd << " -C++" # trim low-quality bases from front and back of reads
42
+ cmd
43
+ end
44
+
45
+ def map_reads(file, left, right, insertsize: 200,
46
+ insertsd: 50, outputname: nil, threads: 8)
47
+ raise SnapError.new("Index not built") if !@index_built
48
+
49
+ lbase = File.basename(left.split(",").first)
50
+ rbase = File.basename(right.split(",").first)
51
+ index = File.basename(@index_name)
52
+ @bam = File.expand_path("#{lbase}.#{rbase}.#{index}.bam")
53
+ @read_count_file = "#{lbase}-#{rbase}-read_count.txt"
54
+
55
+ unless File.exists? @bam
56
+ snapcmd = build_paired_cmd(left, right, threads)
57
+ runner = Cmd.new snapcmd
58
+ runner.run
59
+ save_readcount runner.stdout
60
+ unless runner.status.success?
61
+ raise SnapError.new("Snap failed\n#{runner.stderr}")
62
+ end
63
+ else
64
+ load_readcount left
65
+ end
66
+ @bam
67
+ end
68
+
69
+ def save_readcount stdout
70
+ stdout.split("\n").each do |line|
71
+ cols = line.split(/\s+/)
72
+ if cols[0]=="2000" and cols[1]=="30"
73
+ @read_count = cols[8].to_i / 2
74
+ File.open("#{@read_count_file}", "wb") do |out|
75
+ out.write("#{@read_count}\n")
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ def load_readcount reads
82
+ @read_count = 0
83
+ if File.exist?("#{@read_count_file}")
84
+ @read_count = File.open("#{@read_count_file}").readlines.join.to_i
85
+ else
86
+ reads.split(",").each do |l|
87
+ cmd = "wc -l #{l}"
88
+ count = Cmd.new(cmd)
89
+ count.run
90
+ if count.status.success?
91
+ @read_count += count.stdout.strip.split(/\s+/).first.to_i/4
92
+ File.open("#{@read_count_file}", "wb") do |out|
93
+ out.write("#{@read_count}\n")
94
+ end
95
+ else
96
+ logger.warn "couldn't get number of reads from #{l}"
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ def build_index file, threads
103
+ @index_name = File.basename(file, File.extname(file))
104
+ unless Dir.exists?(@index_name)
105
+ overflow = 500
106
+ cmd = "#{@snap} index #{file} #{@index_name}"
107
+ cmd << " -s 23"
108
+ cmd << " -t#{threads}"
109
+ cmd << " -bSpace" # contig name terminates with space char
110
+ runner = Cmd.new cmd
111
+ runner.run
112
+ if !runner.status.success?
113
+ err = runner.stderr
114
+ msg = "Failed to build Snap index\n#{runner.stderr}"
115
+ raise SnapError.new(msg)
116
+ end
117
+ end
118
+ @index_built = true
119
+ end
120
+
121
+ end # Snap
122
+
123
+ end # Transrate
@@ -22,10 +22,8 @@ module Transrate
22
22
  # @param right [String] path to the right reads
23
23
  # @param insertsize [Integer] mean insert size of the read pairs
24
24
  # @param insertsd [Integer] standard deviation of the read pair insert size
25
- def initialize(assembly, reference,
26
- left: nil, right: nil,
27
- insertsize: nil, insertsd: nil,
28
- threads: 1)
25
+ def initialize(assembly, reference, left: nil, right: nil,
26
+ insertsize: nil, insertsd: nil, threads: 1)
29
27
  if assembly
30
28
  if assembly.is_a?(Assembly)
31
29
  @assembly = assembly
@@ -64,23 +62,22 @@ module Transrate
64
62
  comparative_metrics
65
63
  end
66
64
 
67
- # Reduce all metrics for the assembly to a single quality score.
68
- #
69
- #
70
- #
65
+ # Calculate the geometric mean of an array of numbers
66
+ def geomean(x)
67
+ sum = 0.0
68
+ x.each{ |v| sum += Math.log(v) }
69
+ sum /= x.size
70
+ Math.exp(sum)
71
+ end
72
+
73
+ # Reduce all metrics for the assembly to a single quality score
74
+ # by taking the geometric mean of the scores for all contigs
75
+ # and multiplying it by the proportion of fragments whose most likely
76
+ # mapping is consistent with the assembly
71
77
  # @return [Integer] the assembly score
72
78
  def assembly_score
73
- @score, pg, rc = nil
74
- if @read_metrics.has_run
75
- pg = Metric.new('pg', @read_metrics.pr_good_mapping, 0.0)
76
- end
77
- if @comparative_metrics.has_run
78
- rc = Metric.new('rc', @comparative_metrics.reference_coverage, 0.0)
79
- end
80
- if (pg && rc)
81
- @score = DimensionReduce.dimension_reduce([pg, rc])
82
- end
83
- return @score
79
+ @score = geomean assembly.assembly.values.map{ |contig| contig.score }
80
+ return @score * @read_metrics.p_good_mapping
84
81
  end
85
82
 
86
83
  def assembly_metrics
@@ -8,10 +8,10 @@ module Transrate
8
8
  # the Gem. Versions must be incremented in accordance with
9
9
  # Semantic Versioning 2.0 (http://semver.org/).
10
10
  module VERSION
11
- MAJOR = 0
12
- MINOR = 3
13
- PATCH = 1
14
- BUILD = nil
11
+ MAJOR = 1
12
+ MINOR = 0
13
+ PATCH = 0
14
+ BUILD = 'alpha.1'
15
15
 
16
16
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
17
17
  end
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
18
18
  ACGACGCGCACCACATCAACAAGCACAACTTCCGTGTCCCCTTCGTCTGTGGCTGCCGTGACCTTGGTGAGGCGCTCCGGAGGGTCCGTGAGGGCGCCGC
19
19
  +
20
20
  gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
21
+ @FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/1
22
+ GTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGCTAAGCGCATCGCCG
23
+ +
24
+ _[_eeeeegebe`efddgaeehhhhfgfQ^eb`adfgfffhZ_db\adb`aaabaWQT]`b_b]Y_b`cbbb`]`][]^]aaX^bbdbbY^[_a_[[O][
25
+ @FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/1
26
+ GGCCACGCCTGCTGATGCCGCGCTCATGATGCAGCTTGGGTGCGACGGCGTCTTCGTCGGCTC
27
+ +
28
+ bbbeeeeef`gcghcegdghfhfdfdfdghhfgfhhhhhgG_Z`__`c[_RZa^baX^aaZ_a
29
+ @FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/1
30
+ CTCATGTCTCTCCATCAGTAATAACGGGACTGAATCAATGGTAAGAAACAAAGCTATGGTACTATGCAGAACCTCTTTATTTTT
31
+ +
32
+ __^cc`\cecabcdea_d[`b^[abaUeW^a_e[S^afS^^Y^acbfedeb]^cacefh_V\W\efbddYcd`deV\aRV^ccc
33
+ @FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/1
34
+ TATGATTTGGTGATGCAGACCAAGCAGCTGGGCCGCCTCCCTGTTGTGCAGTTCGCGGCCGGGGGTGTGGCCACGCCTGCTGATGCCGCGCTCATGATGC
35
+ +
36
+ __[cccceggbeeddghhdhhhhhdhhfbfhhhhhhhhhhhhhghfefg_fdcgbdaabaaa_[]LEO^^^``[X_^^`ab`]]_bbac_X]XEYR_b]_
37
+ @FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/1
38
+ GTTCAGGCTGTCACCCACTACAGCGACCCTACCATCCTCGCCGACGTCAGCGCCGGACTCGGGGAGGCCATGGTCGGCATCAACCTCAACGACCCTA
39
+ +
40
+ b_beeeeeggggfiihhihihifhiiiiiiihfhefhiihiiiiigeedcdcca^cacccacccccc_acbbbS]][_[ab_`X`]`bcc[[X_a^b
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
18
18
  GGACCGGCAACATCGTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGC
19
19
  +
20
20
  gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
21
+ @FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/2
22
+ CCCGGCCGCGAACTGCACAACAGGGAGGCGGCCCAGCTGCTTGGTCTGCATCACCAAATCATACGGCGCGGCGATGCGCT
23
+ +
24
+ ___cccecec`eghfRc`efgfhaeefgiicccc^aX_]Y`bbcYbW`_XX]bbbbccccb`bbaa]_TTXTZTXT^[[_
25
+ @FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/2
26
+ TCGCTGTAGTGGGTGACAGCCTGAACGATGGCACGCGCGCGGCGAGCAGGGTCGCCACTCTTGAAGATTCCCGAGCCGACGAAG
27
+ +
28
+ ___ea`e`cgegf^b`bbdgfffhdhhfhgheihfhihhiS\[RKWTL[`YX^_acccccccbcccbbc_abcR]]aXT[_]EQ
29
+ @FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/2
30
+ CATAAGCAGGGCTGGCAGAGCGACAGACATACAAACCGTTCAACATTTGATTGAGTACATGATAAACCAGGGGAGCCTAGTCTAACTTAATACAGTAGCT
31
+ +
32
+ ^_accacagggabbfa]_f`Z_cgfYcZ_[aeggiihaeaeggghgdg_g__gghVb^``d`bdcaced^`__RW^^bbbcbcc]_]]b[]c]_`SY`YY
33
+ @FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/2
34
+ GTTGATGCCGACCATGGCCTCCCCGAGTCCGGCGCTGACGTCGGCGAGGATGG
35
+ +
36
+ _a_e`ccaeeegeafffhh[ffghd_cY^XZefff_d[`gGXKXW]_aaTX_^
37
+ @FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/2
38
+ GGCAGATGGATGGATGGGTGATACAAATATATGAGAGAAGATGACGATGGTGGAGCGGATAGGGTTGTTCACTCGGATCTGGCGGCGTATCGC
39
+ +
40
+ a_beeceegggggfghih`ffbgghhhffbffbfgfgfhdfdffghfghigegdgfhi_addeeZabbbbcbcbcaZ^ac[bbacXaTT^^a_
@@ -10,48 +10,47 @@ class TestTransrateBin < Test::Unit::TestCase
10
10
  end
11
11
 
12
12
  teardown do
13
- files = ["150uncovered.l.fq.150uncovered.r.fq.assembly.2.bai",
14
- "150uncovered.l.fq.150uncovered.r.fq.assembly.2.bam",
15
- "150uncovered.l.fq.150uncovered.r.fq.assembly.2.sam",
16
- "150uncovered.l.fq.150uncovered.r.fq.assembly.2.sorted.bam",
17
- "assembly.2.1.bt2", "assembly.2.2.bt2", "assembly.2.3.bt2",
18
- "assembly.2.4.bt2", "assembly.2.fa.coverage",
13
+ files = ["150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam.bai",
14
+ "150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam",
15
+ "sorghum_transcript_into_Os.protein.2.1.blast",
19
16
  "assembly.2_into_Os.protein.2.1.blast",
20
- "assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
21
- "assembly.2.rev.1.bt2", "assembly.2.rev.2.bt2",
22
- "Os.protein.2_into_assembly.2.2.blast",
17
+ "sorghum_transcript.nhr", "sorghum_transcript.nin", "sorghum_transcript.nsq",
18
+ "Os.protein.2_into_sorghum_transcript.2.blast",
19
+ "assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
23
20
  "Os.protein.2.phr", "Os.protein.2.pin", "Os.protein.2.psq",
24
- "supported_bridges.csv",
25
- "transrate_assemblies.csv",
26
- "transrate_contigs.csv"]
21
+ "transrate_assemblies.csv", "params.xprs",
22
+ "sorghum_transcript.fa_results.xprs",
23
+ "sorghum_transcript.fa_bam_info.csv",
24
+ "transrate_sorghum_transcript.fa_contigs.csv",
25
+ "150uncovered.l.fq-150uncovered.r.fq-read_count.txt",
26
+ "150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.merged.sorted.bam"]
27
27
  files.each do |file|
28
28
  File.delete(file) if File.exist?(file)
29
29
  end
30
+ `rm -rf sorghum_transcript`
30
31
  end
31
32
 
32
33
  should "run help" do
33
34
  c=Transrate::Cmd.new("bundle exec bin/transrate --help")
34
35
  c.run
35
- assert_equal 1751, c.stdout.length, "stdout"
36
+ assert c.stdout =~ /DESCRIPTION/
36
37
  assert_equal true, c.status.success?, "exit status"
37
38
  end
38
39
 
39
40
  should "fail on non existent assembly files" do
40
41
  c=Transrate::Cmd.new("bundle exec bin/transrate --assembly foo.fasta")
41
42
  c.run
42
- assert_equal 163, c.stderr.length, "stderr"
43
43
  assert_equal false, c.status.success?, "exit success"
44
44
  end
45
45
 
46
46
  should "fail on non existent reference files" do
47
47
  c=Transrate::Cmd.new("bundle exec bin/transrate --reference foo.fasta")
48
48
  c.run
49
- assert_equal 104, c.stderr.length, "error"
50
49
  assert_equal false, c.status.success?, "exit status"
51
50
  end
52
51
 
53
52
  should "run on test data" do
54
- assembly = File.join(File.dirname(__FILE__), 'data', 'assembly.2.fa')
53
+ assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
55
54
  reference = File.join(File.dirname(__FILE__), 'data', 'Os.protein.2.fa')
56
55
  left = File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
57
56
  right = File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
@@ -62,8 +61,9 @@ class TestTransrateBin < Test::Unit::TestCase
62
61
  c = Transrate::Cmd.new("#{cmd}")
63
62
  c.run
64
63
  assert_equal true, c.status.success?, "exit status"
65
- assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exit"
66
- assert File.exist?("transrate_contigs.csv"), "csv file doesn't exit"
64
+ assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exist"
65
+ assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
66
+ "contig csv file doesn't exist"
67
67
  hash = {}
68
68
  CSV.foreach("transrate_assemblies.csv", :headers => true,
69
69
  :header_converters => :symbol,
@@ -74,9 +74,38 @@ class TestTransrateBin < Test::Unit::TestCase
74
74
  hash[header]=field
75
75
  end
76
76
  end
77
- assert_equal 10331, hash[:n_bases], "number of bases"
78
- assert_equal 1566, hash[:n50], "n50"
79
- assert_equal 10, hash[:n_refs_with_crbb], "number of crb hits"
77
+ assert_equal 1555, hash[:n_bases], "number of bases"
78
+ assert_equal 823, hash[:n50], "n50"
79
+ assert_equal 0, hash[:n_refs_with_crbb], "number of crb hits"
80
+ end
81
+
82
+ should "run on test data with comma separated list of fastq files" do
83
+ assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
84
+ left = []
85
+ right = []
86
+ left << File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
87
+ left << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.l.fastq')
88
+ right << File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
89
+ right << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.r.fastq')
90
+ cmd = "bundle exec bin/transrate --assembly #{assembly}"
91
+ cmd << " --left #{left.join(",")}"
92
+ cmd << " --right #{right.join(",")}"
93
+ c = Transrate::Cmd.new("#{cmd}")
94
+ c.run
95
+ assert_equal true, c.status.success?, "exit status"
96
+ assert File.exist?("transrate_assemblies.csv")
97
+ assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
98
+ "contig csv file doesn't exist"
99
+ hash = {}
100
+ CSV.foreach("transrate_assemblies.csv", :headers => true,
101
+ :header_converters => :symbol,
102
+ :converters => :all) do |row|
103
+ row.headers.zip(row.fields).each do |header, field|
104
+ hash[header]=field
105
+ end
106
+ end
107
+ assert_equal 1555, hash[:n_bases], "number of bases"
108
+ assert_equal 823, hash[:n50], "n50"
80
109
  end
81
110
 
82
111
  should "fail when one of multiple assemblies is missing" do