transrate 0.3.1 → 1.0.0.alpha.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,20 +1,31 @@
1
- require 'bio-samtools'
2
-
3
1
  module Transrate
4
2
 
5
3
  class Samtools
6
4
 
5
+ class SamtoolsError < StandardError; end
6
+
7
7
  # Get the path to the samtools binary built when bio-samtools
8
8
  # was installed
9
9
  def self.path
10
- gem_path = Gem.loaded_specs['bio-samtools'].full_gem_path
11
- return File.join(gem_path, 'lib/bio/db/sam/external/samtools')
10
+ if !@path
11
+ which_samtools = Cmd.new("which samtools")
12
+ which_samtools.run
13
+ if !which_samtools.status.success?
14
+ raise SamtoolsError.new("could not find samtools in the path")
15
+ end
16
+ @path = which_samtools.stdout.split("\n").first
17
+ end
18
+ return @path
12
19
  end
13
20
 
14
21
  # Run a samtools command
15
22
  def self.run cmd
16
23
  runcmd = Cmd.new "#{Samtools.path} #{cmd}"
17
24
  runcmd.run
25
+ if !runcmd.status.success?
26
+ logger.warn "Samtools command failed: #{runcmd}" +
27
+ "\n#{runcmd.stderr}"
28
+ end
18
29
  runcmd.stdout
19
30
  end
20
31
 
@@ -22,20 +33,45 @@ module Transrate
22
33
  def self.sam_to_bam samfile
23
34
  bamfile = File.basename(samfile, '.sam') + '.bam'
24
35
  bamfile = File.expand_path bamfile
25
- Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
26
- File.expand_path bamfile
36
+ if !File.exist?(bamfile)
37
+ Samtools.run "view -bS #{File.expand_path samfile} > #{bamfile}"
38
+ end
39
+ bamfile
27
40
  end
28
41
 
29
42
  # Sort a bam file, returning the path to the sorted bamfile
30
- def self.sort_bam bamfile
43
+ def self.sort_bam bamfile, threads=4
44
+ # the sort command behaves inconsistently with the other commands:
45
+ # it takes an output prefix rather than a filename
46
+ # and automatically adds the .bam extension
47
+ sorted = File.basename(bamfile, '.bam') + '.sorted'
48
+ if !File.exist?("#{sorted}.bam")
49
+ cmd = "sort"
50
+ cmd << " -l #{threads}"
51
+ cmd << " #{File.expand_path bamfile} #{sorted}"
52
+ Samtools.run cmd
53
+ end
54
+ File.expand_path(sorted + '.bam')
55
+ end
56
+
57
+ # Sort a bam file by readname only, returning the path to th
58
+ # sorted bamfile
59
+ def self.readsort_bam bamfile, threads=4
31
60
  # the sort command behaves inconsistently with the other commands:
32
61
  # it takes an output prefix rather than a filename
33
62
  # and automatically adds the .bam extension
34
63
  sorted = File.basename(bamfile, '.bam') + '.sorted'
35
- Samtools.run "sort #{File.expand_path bamfile} #{sorted}"
64
+ if !File.exist?("#{sorted}.bam")
65
+ cmd = "sort"
66
+ cmd << " -l #{threads}"
67
+ cmd << " -n" # sort by read name only
68
+ cmd << " #{File.expand_path bamfile} #{sorted}"
69
+ Samtools.run cmd
70
+ end
36
71
  File.expand_path(sorted + '.bam')
37
72
  end
38
73
 
74
+
39
75
  # Index a bamfile, returning the path to the index
40
76
  def self.index_bam bamfile
41
77
  index = File.basename(bamfile, '.bam') + '.bai'
@@ -57,17 +93,53 @@ module Transrate
57
93
  # return the path to the coverage file
58
94
  def self.coverage bam
59
95
  outfile = File.expand_path "#{File.basename(bam.fasta)}.coverage"
60
- cmd = "mpileup"
61
- cmd += " -f #{File.expand_path bam.fasta}" # reference
62
- cmd += " -B" # don't calculate BAQ quality scores
63
- cmd += " -Q0" # include all reads ignoring quality
64
- cmd += " -I" # don't do genotype calculations
65
- cmd += " #{File.expand_path bam.bam}" # the bam file
66
- cmd += " > #{outfile}"
67
- Samtools.run cmd
96
+ if !File.exist?(outfile)
97
+ cmd = "mpileup"
98
+ cmd += " -f #{File.expand_path bam.fasta}" # reference
99
+ cmd += " -B" # don't calculate BAQ quality scores
100
+ cmd += " -Q0" # include all reads ignoring quality
101
+ cmd += " -I" # don't do genotype calculations
102
+ cmd += " #{File.expand_path bam.bam}" # the bam file
103
+ cmd += " > #{outfile}"
104
+ Samtools.run cmd
105
+ end
106
+ outfile
107
+ end
108
+
109
+ # Calculate per-base coverage and mapQ score from a sorted, indexed
110
+ # bam file. Return the path to the coverage file.
111
+ def self.bam_to_bcf(bam, fasta)
112
+ outfile = File.expand_path "#{File.basename(fasta)}.bcf"
113
+ if !File.exist?(outfile)
114
+ cmd = "samtools mpileup"
115
+ cmd << " -f #{File.expand_path fasta}" # reference
116
+ cmd << " -B" # don't calculate BAQ quality scores
117
+ cmd << " -q0" # include all multimapping reads
118
+ cmd << " -Q0" # include all reads ignoring quality
119
+ cmd << " -I" # don't do genotype calculations
120
+ cmd << " -u" # output uncompressed bcf format
121
+ cmd << " #{File.expand_path bam}" # the bam file
122
+ cmd << " | bcftools view -cg - "
123
+ cmd << " > #{outfile}"
124
+ mpileup = Cmd.new cmd
125
+ mpileup.run
126
+ if !mpileup.status.success?
127
+ raise RuntimeError.new("samtools and bcftools failed")
128
+ end
129
+ end
68
130
  outfile
69
131
  end
70
132
 
133
+ def self.merge_bam left, right, out, threads=1
134
+ cmd = "merge"
135
+ cmd << " -@ #{threads}"
136
+ cmd << " #{out}"
137
+ cmd << " #{left}"
138
+ cmd << " #{right}"
139
+ Samtools.run cmd
140
+ out
141
+ end
142
+
71
143
  end
72
144
 
73
145
  end
@@ -0,0 +1,123 @@
1
+ module Transrate
2
+
3
+ class SnapError < StandardError
4
+ end
5
+
6
+ class Snap
7
+
8
+ attr_reader :index_name, :sam, :read_count
9
+
10
+ def initialize
11
+ which_snap = Cmd.new('which snap')
12
+ which_snap.run
13
+ if !which_snap.status.success?
14
+ raise SnapError.new("could not find snap in the path")
15
+ end
16
+ @snap = which_snap.stdout.split("\n").first
17
+
18
+ @index_built = false
19
+ @index_name = ""
20
+ end
21
+
22
+ def build_paired_cmd l, r, threads
23
+ cmd = "#{@snap} paired #{@index_name}"
24
+ l.split(",").zip(r.split(",")).each do |left, right|
25
+ cmd << " #{left} #{right}"
26
+ end
27
+ # NOTE: do NOT turn on the -so flag (sort bam output)
28
+ # it violates the basic assumption of eXpress's streaming
29
+ # algorithm: that the fragments are observed in approximately
30
+ # random order.
31
+ cmd << " -o #{@bam}"
32
+ cmd << " -s 0 1000" # min and max distance between paired-read starts
33
+ cmd << " -H 300000" # max seed hits to consider in paired mode
34
+ cmd << " -h 2000" # max seed hits to consider when reverting to single
35
+ cmd << " -I" # ignore read IDs
36
+ cmd << " -d 30" # max edit distance (function of read length?)
37
+ cmd << " -t #{threads}"
38
+ cmd << " -b" # bind threads to cores
39
+ cmd << " -M" # format cigar string
40
+ cmd << " -sa" # keep all alignments, don't discard 0x100
41
+ # cmd << " -C++" # trim low-quality bases from front and back of reads
42
+ cmd
43
+ end
44
+
45
+ def map_reads(file, left, right, insertsize: 200,
46
+ insertsd: 50, outputname: nil, threads: 8)
47
+ raise SnapError.new("Index not built") if !@index_built
48
+
49
+ lbase = File.basename(left.split(",").first)
50
+ rbase = File.basename(right.split(",").first)
51
+ index = File.basename(@index_name)
52
+ @bam = File.expand_path("#{lbase}.#{rbase}.#{index}.bam")
53
+ @read_count_file = "#{lbase}-#{rbase}-read_count.txt"
54
+
55
+ unless File.exists? @bam
56
+ snapcmd = build_paired_cmd(left, right, threads)
57
+ runner = Cmd.new snapcmd
58
+ runner.run
59
+ save_readcount runner.stdout
60
+ unless runner.status.success?
61
+ raise SnapError.new("Snap failed\n#{runner.stderr}")
62
+ end
63
+ else
64
+ load_readcount left
65
+ end
66
+ @bam
67
+ end
68
+
69
+ def save_readcount stdout
70
+ stdout.split("\n").each do |line|
71
+ cols = line.split(/\s+/)
72
+ if cols[0]=="2000" and cols[1]=="30"
73
+ @read_count = cols[8].to_i / 2
74
+ File.open("#{@read_count_file}", "wb") do |out|
75
+ out.write("#{@read_count}\n")
76
+ end
77
+ end
78
+ end
79
+ end
80
+
81
+ def load_readcount reads
82
+ @read_count = 0
83
+ if File.exist?("#{@read_count_file}")
84
+ @read_count = File.open("#{@read_count_file}").readlines.join.to_i
85
+ else
86
+ reads.split(",").each do |l|
87
+ cmd = "wc -l #{l}"
88
+ count = Cmd.new(cmd)
89
+ count.run
90
+ if count.status.success?
91
+ @read_count += count.stdout.strip.split(/\s+/).first.to_i/4
92
+ File.open("#{@read_count_file}", "wb") do |out|
93
+ out.write("#{@read_count}\n")
94
+ end
95
+ else
96
+ logger.warn "couldn't get number of reads from #{l}"
97
+ end
98
+ end
99
+ end
100
+ end
101
+
102
+ def build_index file, threads
103
+ @index_name = File.basename(file, File.extname(file))
104
+ unless Dir.exists?(@index_name)
105
+ overflow = 500
106
+ cmd = "#{@snap} index #{file} #{@index_name}"
107
+ cmd << " -s 23"
108
+ cmd << " -t#{threads}"
109
+ cmd << " -bSpace" # contig name terminates with space char
110
+ runner = Cmd.new cmd
111
+ runner.run
112
+ if !runner.status.success?
113
+ err = runner.stderr
114
+ msg = "Failed to build Snap index\n#{runner.stderr}"
115
+ raise SnapError.new(msg)
116
+ end
117
+ end
118
+ @index_built = true
119
+ end
120
+
121
+ end # Snap
122
+
123
+ end # Transrate
@@ -22,10 +22,8 @@ module Transrate
22
22
  # @param right [String] path to the right reads
23
23
  # @param insertsize [Integer] mean insert size of the read pairs
24
24
  # @param insertsd [Integer] standard deviation of the read pair insert size
25
- def initialize(assembly, reference,
26
- left: nil, right: nil,
27
- insertsize: nil, insertsd: nil,
28
- threads: 1)
25
+ def initialize(assembly, reference, left: nil, right: nil,
26
+ insertsize: nil, insertsd: nil, threads: 1)
29
27
  if assembly
30
28
  if assembly.is_a?(Assembly)
31
29
  @assembly = assembly
@@ -64,23 +62,22 @@ module Transrate
64
62
  comparative_metrics
65
63
  end
66
64
 
67
- # Reduce all metrics for the assembly to a single quality score.
68
- #
69
- #
70
- #
65
+ # Calculate the geometric mean of an array of numbers
66
+ def geomean(x)
67
+ sum = 0.0
68
+ x.each{ |v| sum += Math.log(v) }
69
+ sum /= x.size
70
+ Math.exp(sum)
71
+ end
72
+
73
+ # Reduce all metrics for the assembly to a single quality score
74
+ # by taking the geometric mean of the scores for all contigs
75
+ # and multiplying it by the proportion of fragments whose most likely
76
+ # mapping is consistent with the assembly
71
77
  # @return [Integer] the assembly score
72
78
  def assembly_score
73
- @score, pg, rc = nil
74
- if @read_metrics.has_run
75
- pg = Metric.new('pg', @read_metrics.pr_good_mapping, 0.0)
76
- end
77
- if @comparative_metrics.has_run
78
- rc = Metric.new('rc', @comparative_metrics.reference_coverage, 0.0)
79
- end
80
- if (pg && rc)
81
- @score = DimensionReduce.dimension_reduce([pg, rc])
82
- end
83
- return @score
79
+ @score = geomean assembly.assembly.values.map{ |contig| contig.score }
80
+ return @score * @read_metrics.p_good_mapping
84
81
  end
85
82
 
86
83
  def assembly_metrics
@@ -8,10 +8,10 @@ module Transrate
8
8
  # the Gem. Versions must be incremented in accordance with
9
9
  # Semantic Versioning 2.0 (http://semver.org/).
10
10
  module VERSION
11
- MAJOR = 0
12
- MINOR = 3
13
- PATCH = 1
14
- BUILD = nil
11
+ MAJOR = 1
12
+ MINOR = 0
13
+ PATCH = 0
14
+ BUILD = 'alpha.1'
15
15
 
16
16
  STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
17
17
  end
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
18
18
  ACGACGCGCACCACATCAACAAGCACAACTTCCGTGTCCCCTTCGTCTGTGGCTGCCGTGACCTTGGTGAGGCGCTCCGGAGGGTCCGTGAGGGCGCCGC
19
19
  +
20
20
  gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
21
+ @FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/1
22
+ GTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGCTAAGCGCATCGCCG
23
+ +
24
+ _[_eeeeegebe`efddgaeehhhhfgfQ^eb`adfgfffhZ_db\adb`aaabaWQT]`b_b]Y_b`cbbb`]`][]^]aaX^bbdbbY^[_a_[[O][
25
+ @FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/1
26
+ GGCCACGCCTGCTGATGCCGCGCTCATGATGCAGCTTGGGTGCGACGGCGTCTTCGTCGGCTC
27
+ +
28
+ bbbeeeeef`gcghcegdghfhfdfdfdghhfgfhhhhhgG_Z`__`c[_RZa^baX^aaZ_a
29
+ @FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/1
30
+ CTCATGTCTCTCCATCAGTAATAACGGGACTGAATCAATGGTAAGAAACAAAGCTATGGTACTATGCAGAACCTCTTTATTTTT
31
+ +
32
+ __^cc`\cecabcdea_d[`b^[abaUeW^a_e[S^afS^^Y^acbfedeb]^cacefh_V\W\efbddYcd`deV\aRV^ccc
33
+ @FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/1
34
+ TATGATTTGGTGATGCAGACCAAGCAGCTGGGCCGCCTCCCTGTTGTGCAGTTCGCGGCCGGGGGTGTGGCCACGCCTGCTGATGCCGCGCTCATGATGC
35
+ +
36
+ __[cccceggbeeddghhdhhhhhdhhfbfhhhhhhhhhhhhhghfefg_fdcgbdaabaaa_[]LEO^^^``[X_^^`ab`]]_bbac_X]XEYR_b]_
37
+ @FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/1
38
+ GTTCAGGCTGTCACCCACTACAGCGACCCTACCATCCTCGCCGACGTCAGCGCCGGACTCGGGGAGGCCATGGTCGGCATCAACCTCAACGACCCTA
39
+ +
40
+ b_beeeeeggggfiihhihihifhiiiiiiihfhefhiihiiiiigeedcdcca^cacccacccccc_acbbbS]][_[ab_`X`]`bcc[[X_a^b
@@ -18,3 +18,23 @@ gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
18
18
  GGACCGGCAACATCGTCGAGGCCGTCAGGCATGTGCGGTCCGTCATGGGCGATGTCCGTGCGCTCCGGAACATGGATGATGATGAGGTGTTCGCGTATGC
19
19
  +
20
20
  gggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggggg
21
+ @FCC2HFRACXX:7:1101:2015:1981#TGACCAAT/2
22
+ CCCGGCCGCGAACTGCACAACAGGGAGGCGGCCCAGCTGCTTGGTCTGCATCACCAAATCATACGGCGCGGCGATGCGCT
23
+ +
24
+ ___cccecec`eghfRc`efgfhaeefgiicccc^aX_]Y`bbcYbW`_XX]bbbbccccb`bbaa]_TTXTZTXT^[[_
25
+ @FCC2HFRACXX:7:1101:7968:3054#TGACCAAT/2
26
+ TCGCTGTAGTGGGTGACAGCCTGAACGATGGCACGCGCGCGGCGAGCAGGGTCGCCACTCTTGAAGATTCCCGAGCCGACGAAG
27
+ +
28
+ ___ea`e`cgegf^b`bbdgfffhdhhfhgheihfhihhiS\[RKWTL[`YX^_acccccccbcccbbc_abcR]]aXT[_]EQ
29
+ @FCC2HFRACXX:7:1101:18523:3712#TGACCAAT/2
30
+ CATAAGCAGGGCTGGCAGAGCGACAGACATACAAACCGTTCAACATTTGATTGAGTACATGATAAACCAGGGGAGCCTAGTCTAACTTAATACAGTAGCT
31
+ +
32
+ ^_accacagggabbfa]_f`Z_cgfYcZ_[aeggiihaeaeggghgdg_g__gghVb^``d`bdcaced^`__RW^^bbbcbcc]_]]b[]c]_`SY`YY
33
+ @FCC2HFRACXX:7:1101:6215:4551#TGACCAAT/2
34
+ GTTGATGCCGACCATGGCCTCCCCGAGTCCGGCGCTGACGTCGGCGAGGATGG
35
+ +
36
+ _a_e`ccaeeegeafffhh[ffghd_cY^XZefff_d[`gGXKXW]_aaTX_^
37
+ @FCC2HFRACXX:7:1101:9701:4567#TGACCAAT/2
38
+ GGCAGATGGATGGATGGGTGATACAAATATATGAGAGAAGATGACGATGGTGGAGCGGATAGGGTTGTTCACTCGGATCTGGCGGCGTATCGC
39
+ +
40
+ a_beeceegggggfghih`ffbgghhhffbffbfgfgfhdfdffghfghigegdgfhi_addeeZabbbbcbcbcaZ^ac[bbacXaTT^^a_
@@ -10,48 +10,47 @@ class TestTransrateBin < Test::Unit::TestCase
10
10
  end
11
11
 
12
12
  teardown do
13
- files = ["150uncovered.l.fq.150uncovered.r.fq.assembly.2.bai",
14
- "150uncovered.l.fq.150uncovered.r.fq.assembly.2.bam",
15
- "150uncovered.l.fq.150uncovered.r.fq.assembly.2.sam",
16
- "150uncovered.l.fq.150uncovered.r.fq.assembly.2.sorted.bam",
17
- "assembly.2.1.bt2", "assembly.2.2.bt2", "assembly.2.3.bt2",
18
- "assembly.2.4.bt2", "assembly.2.fa.coverage",
13
+ files = ["150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam.bai",
14
+ "150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.bam",
15
+ "sorghum_transcript_into_Os.protein.2.1.blast",
19
16
  "assembly.2_into_Os.protein.2.1.blast",
20
- "assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
21
- "assembly.2.rev.1.bt2", "assembly.2.rev.2.bt2",
22
- "Os.protein.2_into_assembly.2.2.blast",
17
+ "sorghum_transcript.nhr", "sorghum_transcript.nin", "sorghum_transcript.nsq",
18
+ "Os.protein.2_into_sorghum_transcript.2.blast",
19
+ "assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
23
20
  "Os.protein.2.phr", "Os.protein.2.pin", "Os.protein.2.psq",
24
- "supported_bridges.csv",
25
- "transrate_assemblies.csv",
26
- "transrate_contigs.csv"]
21
+ "transrate_assemblies.csv", "params.xprs",
22
+ "sorghum_transcript.fa_results.xprs",
23
+ "sorghum_transcript.fa_bam_info.csv",
24
+ "transrate_sorghum_transcript.fa_contigs.csv",
25
+ "150uncovered.l.fq-150uncovered.r.fq-read_count.txt",
26
+ "150uncovered.l.fq.150uncovered.r.fq.sorghum_transcript.merged.sorted.bam"]
27
27
  files.each do |file|
28
28
  File.delete(file) if File.exist?(file)
29
29
  end
30
+ `rm -rf sorghum_transcript`
30
31
  end
31
32
 
32
33
  should "run help" do
33
34
  c=Transrate::Cmd.new("bundle exec bin/transrate --help")
34
35
  c.run
35
- assert_equal 1751, c.stdout.length, "stdout"
36
+ assert c.stdout =~ /DESCRIPTION/
36
37
  assert_equal true, c.status.success?, "exit status"
37
38
  end
38
39
 
39
40
  should "fail on non existent assembly files" do
40
41
  c=Transrate::Cmd.new("bundle exec bin/transrate --assembly foo.fasta")
41
42
  c.run
42
- assert_equal 163, c.stderr.length, "stderr"
43
43
  assert_equal false, c.status.success?, "exit success"
44
44
  end
45
45
 
46
46
  should "fail on non existent reference files" do
47
47
  c=Transrate::Cmd.new("bundle exec bin/transrate --reference foo.fasta")
48
48
  c.run
49
- assert_equal 104, c.stderr.length, "error"
50
49
  assert_equal false, c.status.success?, "exit status"
51
50
  end
52
51
 
53
52
  should "run on test data" do
54
- assembly = File.join(File.dirname(__FILE__), 'data', 'assembly.2.fa')
53
+ assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
55
54
  reference = File.join(File.dirname(__FILE__), 'data', 'Os.protein.2.fa')
56
55
  left = File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
57
56
  right = File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
@@ -62,8 +61,9 @@ class TestTransrateBin < Test::Unit::TestCase
62
61
  c = Transrate::Cmd.new("#{cmd}")
63
62
  c.run
64
63
  assert_equal true, c.status.success?, "exit status"
65
- assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exit"
66
- assert File.exist?("transrate_contigs.csv"), "csv file doesn't exit"
64
+ assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exist"
65
+ assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
66
+ "contig csv file doesn't exist"
67
67
  hash = {}
68
68
  CSV.foreach("transrate_assemblies.csv", :headers => true,
69
69
  :header_converters => :symbol,
@@ -74,9 +74,38 @@ class TestTransrateBin < Test::Unit::TestCase
74
74
  hash[header]=field
75
75
  end
76
76
  end
77
- assert_equal 10331, hash[:n_bases], "number of bases"
78
- assert_equal 1566, hash[:n50], "n50"
79
- assert_equal 10, hash[:n_refs_with_crbb], "number of crb hits"
77
+ assert_equal 1555, hash[:n_bases], "number of bases"
78
+ assert_equal 823, hash[:n50], "n50"
79
+ assert_equal 0, hash[:n_refs_with_crbb], "number of crb hits"
80
+ end
81
+
82
+ should "run on test data with comma separated list of fastq files" do
83
+ assembly = File.join(File.dirname(__FILE__), 'data', 'sorghum_transcript.fa')
84
+ left = []
85
+ right = []
86
+ left << File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
87
+ left << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.l.fastq')
88
+ right << File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
89
+ right << File.join(File.dirname(__FILE__), 'data', 'bridging_reads.r.fastq')
90
+ cmd = "bundle exec bin/transrate --assembly #{assembly}"
91
+ cmd << " --left #{left.join(",")}"
92
+ cmd << " --right #{right.join(",")}"
93
+ c = Transrate::Cmd.new("#{cmd}")
94
+ c.run
95
+ assert_equal true, c.status.success?, "exit status"
96
+ assert File.exist?("transrate_assemblies.csv")
97
+ assert File.exist?("transrate_sorghum_transcript.fa_contigs.csv"),
98
+ "contig csv file doesn't exist"
99
+ hash = {}
100
+ CSV.foreach("transrate_assemblies.csv", :headers => true,
101
+ :header_converters => :symbol,
102
+ :converters => :all) do |row|
103
+ row.headers.zip(row.fields).each do |header, field|
104
+ hash[header]=field
105
+ end
106
+ end
107
+ assert_equal 1555, hash[:n_bases], "number of bases"
108
+ assert_equal 823, hash[:n50], "n50"
80
109
  end
81
110
 
82
111
  should "fail when one of multiple assemblies is missing" do