transrate 1.0.0.beta1 → 1.0.0.beta2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 015defaf8abd6b99791790ba1d59b016345c78b7
4
- data.tar.gz: ea71a0e84c35c4fbc002314f8730530671c5b38c
3
+ metadata.gz: b90d296192b895be2cb4f5411ccc716264f5f360
4
+ data.tar.gz: 54fa1d66571912a1526aa7bd0d9ed549a04c8b19
5
5
  SHA512:
6
- metadata.gz: 1c27bdcb6cdc2bf1c855fd8d30e1b834a09e1557974880b7f18a06f0eca7cd383504237a2f299bfa90e1f52bcbe3395b9b263d021c3ebc3bd02f092f6df9115d
7
- data.tar.gz: bdcbe15c1dcd33233aada0b55955c8ad6d7cf1cc73f54893ea131f2e7f445b6447d9d2f47de8a1b82786413db275966d3e7db80d65e84a020020d2c22b74a3f1
6
+ metadata.gz: dc43e7f297ce2d53a03f9a7fcdde87124c605750cb6947d35e32185f5de92f53785fab37302e3abde3e4a2f7736aee3680fb415ab413d914720c025ebbff5f87
7
+ data.tar.gz: a122304dae2f6009c22e9d396c2b2f874cfa47937e582b1b5e5342cdf2a6d7eaeddcca465b3571022f7979d385f1c4014541a1ac1c5ca241eacaa8e8a4fd84e6
data/.gitignore CHANGED
@@ -19,6 +19,7 @@ tmp
19
19
  \#*
20
20
  *so
21
21
  dryrun
22
+ .DS_Store
22
23
 
23
24
  # YARD artifacts
24
25
  .yardoc
data/.travis.yml CHANGED
@@ -2,8 +2,16 @@ language: ruby
2
2
  rvm:
3
3
  - "2.0.0"
4
4
  - "2.1.0"
5
+ - "2.2.0"
5
6
  before_script:
6
7
  - gem install bindeps
7
8
  - bundle install
8
9
  - bundle exec rake compile
9
10
  - bundle exec bin/transrate --install-deps
11
+ os:
12
+ - linux
13
+ - osx
14
+ matrix:
15
+ allow_failures:
16
+ - os: osx
17
+ - rvm: "2.2.0"
data/CITATION ADDED
@@ -0,0 +1,3 @@
1
+ To cite transrate in publications, please use:
2
+
3
+ Smith-Unna, Richard D.; Boursnell, Chris M.; Hibberd, Julian M. and Kelly, Steven (2014). Transrate: v1.0.0 beta 1. Github: https://github.com/Blahah/transrate. 10.5281/zenodo.13161
data/README.md CHANGED
@@ -24,7 +24,7 @@ This software is being actively developed. Please be aware that there may be bug
24
24
 
25
25
  ## Citation
26
26
 
27
- Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/3687/Blahah/transrate.png)](http://dx.doi.org/10.5281/zenodo.11039).
27
+ Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.13161.svg)](http://dx.doi.org/10.5281/zenodo.13161).
28
28
 
29
29
  ## Documentation
30
30
 
data/Rakefile CHANGED
@@ -57,5 +57,76 @@ Rake::TestTask.new do |t|
57
57
  t.test_files = ['test/test_contig.rb']
58
58
  end
59
59
 
60
+ Rake::TestTask.new do |t|
61
+ t.name = :assembly
62
+ t.libs << 'test'
63
+ t.test_files = ['test/test_assembly.rb']
64
+ end
65
+
66
+ Rake::TestTask.new do |t|
67
+ t.name = :snap
68
+ t.libs << 'test'
69
+ t.test_files = ['test/test_snap.rb']
70
+ end
71
+
72
+ Rake::TestTask.new do |t|
73
+ t.name = :salmon
74
+ t.libs << 'test'
75
+ t.test_files = ['test/test_salmon.rb']
76
+ end
77
+
78
+
60
79
  desc "Run tests"
61
80
  task :default => :test
81
+
82
+ # PACKAGING
83
+
84
+ PACKAGE_NAME = "transrate"
85
+ VERSION = "1.0.0.beta2"
86
+ TRAVELING_RUBY_VERSION = "20141215-2.1.5"
87
+
88
+ desc "Package your app"
89
+ task :package => ['package:linux:x86_64', 'package:osx']
90
+
91
+ namespace :package do
92
+ namespace :linux do
93
+ desc "Package your app for Linux x86_64"
94
+ task :x86_64 => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
95
+ create_package("linux-x86_64")
96
+ end
97
+ end
98
+
99
+ desc "Package your app for OS X"
100
+ task :osx => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
101
+ create_package("osx")
102
+ end
103
+ end
104
+
105
+ file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
106
+ download_runtime("linux-x86_64")
107
+ end
108
+
109
+ file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
110
+ download_runtime("osx")
111
+ end
112
+
113
+ def create_package(target)
114
+ package_dir = "packaging/#{PACKAGE_NAME}-#{VERSION}-#{target}"
115
+ sh "rm -rf #{package_dir}"
116
+ sh "mkdir -p #{package_dir}/lib/app"
117
+ sh "cp -r lib #{package_dir}/lib/app/"
118
+ sh "cp -r bin #{package_dir}/lib/app/"
119
+ sh "mkdir #{package_dir}/lib/ruby"
120
+ sh "tar -xzf packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz -C #{package_dir}/lib/ruby"
121
+ sh "cp packaging/transrate #{package_dir}/transrate"
122
+ if !ENV['DIR_ONLY']
123
+ sh "tar -czf #{package_dir}.tar.gz #{package_dir}"
124
+ sh "rm -rf #{package_dir}"
125
+ end
126
+ end
127
+
128
+ def download_runtime(target)
129
+ sh "mkdir -p packaging/packaging &&" +
130
+ "cd packaging/packaging && curl -L -O --fail " +
131
+ "http://d6r77u77i8pq3.cloudfront.net/releases/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz"
132
+ end
data/bin/transrate CHANGED
@@ -1,14 +1,32 @@
1
1
  #!/usr/bin/env ruby
2
- PROFILE = false
3
2
 
4
3
  require 'trollop'
5
4
  require 'transrate'
6
5
  require 'csv'
7
6
  require 'bindeps'
8
- require 'ruby-prof'
9
7
 
8
+ include Transrate
9
+
10
+ # Show the help message if no arguments provided
10
11
  ARGV[0] = "--help" if ARGV.length() == 0
11
12
 
13
+ # We want clean error messages through the logger, no ugly backtraces
14
+ # because the user doesn't care about them, unless they specifically ask for
15
+ # them with --loglevel debug
16
+ module Kernel
17
+ alias _raise raise
18
+
19
+ def raise(*a)
20
+ begin
21
+ _raise(*a)
22
+ rescue TransrateError => e
23
+ logger.error e.message
24
+ logger.debug e.backtrace unless e.backtrace.nil?
25
+ exit 1
26
+ end
27
+ end
28
+ end
29
+
12
30
  opts = Trollop::options do
13
31
  version Transrate::VERSION::STRING.dup
14
32
  banner <<-EOS
@@ -19,8 +37,8 @@ opts = Trollop::options do
19
37
  DESCRIPTION:
20
38
  Analyse a de-novo transcriptome assembly using three kinds of metrics:
21
39
 
22
- 1. contig-based
23
- 2. read-mapping (if --left and --right are provided)
40
+ 1. sequence-based (basic)
41
+ 2. read-mapping-based (if --left and --right are provided)
24
42
  3. reference-based (if --reference is provided)
25
43
 
26
44
  Bug reports and feature requests at:
@@ -34,10 +52,10 @@ opts = Trollop::options do
34
52
  transrate --install-deps
35
53
  # get the transrate score for the assembly and each contig
36
54
  transrate --assembly contigs.fa --left left.fq --right right.fq
37
- # contig metrics only
55
+ # basic assembly metrics only
38
56
  transrate --assembly contigs.fa
39
- # contig and reference-based metrics with 8 threads
40
- transrate --assembly contigs.fa --reference Athaliana_protein.fa --threads 8
57
+ # basic and reference-based metrics with 8 threads
58
+ transrate --assembly contigs.fa --reference Athaliana_transcripts.fa --threads 8
41
59
  # contig and read-based metrics for two assemblies with 32 threads
42
60
  transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
43
61
 
@@ -61,61 +79,109 @@ opts = Trollop::options do
61
79
  "one of [error, info, warn, debug]",
62
80
  :default => 'info'
63
81
  opt :install_deps, "install any missing dependencies"
82
+ opt :install_read_deps, "install missing dependencies for read metrics only"
83
+ opt :install_ref_deps, "install missing dependencies for reference metrics only"
64
84
  end
85
+
86
+ # Check dependencies if they are relevant to the command issued,
87
+ # and handle any commands to install missing ones
65
88
  gem_dir = Gem.loaded_specs['transrate'].full_gem_path
66
89
  gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
67
- if opts.install_deps
90
+ blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
91
+
92
+ if opts.install_deps || opts.install_read_deps || opts.install_ref_deps
93
+ # user has requested dependency installation
68
94
  puts "Checking dependencies"
69
- Bindeps.require gem_deps
95
+
96
+ missing = []
97
+ if opts.install_deps || opts.install_read_deps
98
+ Bindeps.require gem_deps
99
+ missing += Bindeps.missing gem_deps
100
+ end
101
+
102
+ if opts.install_deps || opts.install_ref_deps
103
+ Bindeps.require blast_dep
104
+ missing += Bindeps.missing blast_dep
105
+ end
106
+
107
+ unless missing.empty?
108
+ raise TransrateError.new "Failed to install: \n - #{missing.join('\n - ')}"
109
+ end
110
+
70
111
  puts "All dependencies installed"
71
112
  exit
113
+
72
114
  else
73
- missing = Bindeps.missing gem_deps
74
- if missing.length > 0
115
+ # no dependency installation requested, but check dependencies
116
+ # for the commands provided are installed
117
+ missing = []
118
+ missing = Bindeps.missing gem_deps if opts.left
119
+ blast_missing = []
120
+ blast_missing = Bindeps.missing blast_dep if opts.reference
121
+
122
+ if missing.length + blast_missing.length > 0
75
123
  puts "Dependencies are missing:"
124
+
76
125
  missing.each do |dep|
77
126
  puts " - #{dep.name} (#{dep.version})"
78
127
  end
79
- puts "To install all missing dependencies, run `transrate --install-deps`"
80
- exit(1)
128
+
129
+ blast_missing.each do |dep|
130
+ puts " - #{dep.name} (#{dep.version})"
131
+ end
132
+
133
+ puts "To install all missing dependencies, run:"
134
+ puts " transrate --install-deps"
135
+ puts "If you only want the read-metrics dependencies:"
136
+ puts " transrate --install-read-deps"
137
+ puts "Or if you only want the reference-metrics dependencies: "
138
+ puts " transrate --install-ref-deps"
139
+
140
+ exit 1
81
141
  end
142
+
143
+ end
144
+
145
+ # Handle commands
146
+ unless %w[error info warn debug].include? opts.loglevel
147
+ raise "Loglevel #{opts.loglevel} is not valid. " +
148
+ "It must be one of: error, info, warn, debug."
82
149
  end
83
150
 
151
+ logger.level = Yell::Level.new opts.loglevel.to_sym
152
+
84
153
  if opts.assembly
85
154
  opts.assembly.split(',').each do |assembly_file|
86
155
  unless File.exist?(assembly_file)
87
- raise IOError.new "Assembly fasta file does not exist: #{assembly_file}"
156
+ raise TransrateIOError.new "Assembly fasta file does not exist: " +
157
+ " #{assembly_file}"
88
158
  end
89
159
  end
90
160
  else
91
- raise ArgumentError.new "Option --assembly must be specified. " +
92
- "Try --help for help."
161
+ raise TransrateArgError.new "Option --assembly must be specified. " +
162
+ "Try --help for help."
93
163
  end
94
164
 
95
165
  if opts.reference && !File.exist?(opts.reference)
96
- raise IOError.new "Reference fasta file does not exist: #{opts.reference}"
166
+ raise TransrateIOError.new "Reference fasta file does not exist: " +
167
+ " #{opts.reference}"
97
168
  end
98
169
 
99
170
  if opts.left and opts.right
100
171
  if opts.left.split(",").length != opts.right.split(",").length
101
172
  msg = "Please provide the same number of left reads as right reads"
102
- raise ArgumentError.new(msg)
173
+ raise TransrateArgError.new msg
103
174
  end
104
175
  opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
105
176
  if !File.exist?(left)
106
- raise IOError.new "Left read fastq file does not exist: #{left}"
177
+ raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
107
178
  end
108
179
  if !File.exist?(right)
109
- raise IOError.new "Right read fastq file does not exist: #{right}"
180
+ raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
110
181
  end
111
182
  end
112
183
  end
113
184
 
114
- if PROFILE
115
- logger.info "Starting profiler"
116
- RubyProf.start
117
- end
118
-
119
185
  def pretty_print_hash hash, width
120
186
  hash.map do |k, v|
121
187
  # show as float if there are any decimal places
@@ -133,15 +199,6 @@ def pretty_print_hash hash, width
133
199
  end
134
200
  end
135
201
 
136
- include Transrate
137
-
138
- unless %w[error info warn debug].include? opts.loglevel
139
- raise "Loglevel #{opts.loglevel} is not valid. " +
140
- "It must be one of: error, info, warn, debug."
141
- end
142
-
143
- logger.level = Yell::Level.new opts.loglevel.to_sym
144
-
145
202
  r = opts.reference ? Assembly.new(opts.reference) : nil
146
203
  report_width = 35
147
204
 
@@ -251,6 +308,7 @@ opts.assembly.split(',').each do |assembly|
251
308
  all << contig_results.merge(read_results)
252
309
  .merge(comparative_results)
253
310
  .merge({ :assembly => assembly })
311
+ .merge({ :score => score })
254
312
 
255
313
  end
256
314
 
@@ -266,10 +324,3 @@ CSV.open(outfile, 'wb') do |file|
266
324
  file << head.map { |x| row[x] }
267
325
  end
268
326
  end
269
-
270
- if PROFILE
271
- logger.info "Writing profiling results to transrate_profile.txt"
272
- result = RubyProf.stop
273
- printer = RubyProf::FlatPrinter.new(result)
274
- printer.print(File.open('transrate_profile.txt', 'w'))
275
- end
data/deps/blast.yaml ADDED
@@ -0,0 +1,27 @@
1
+ blastplus:
2
+ binaries:
3
+ - makeblastdb
4
+ - blastn
5
+ - tblastn
6
+ - blastp
7
+ - blastx
8
+ - tblastx
9
+ - makembindex
10
+ - psiblast
11
+ - rpsblast
12
+ - blastdbcmd
13
+ - segmasker
14
+ - dustmasker
15
+ - blast_formatter
16
+ - windowmasker
17
+ - blastdb_aliastool
18
+ - deltablast
19
+ - rpstblastn
20
+ - blastdbcheck
21
+ version:
22
+ number: '2.2.[0-9]'
23
+ command: 'blastx -version'
24
+ url:
25
+ 64bit:
26
+ macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
27
+ linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
data/deps/deps.yaml CHANGED
@@ -1,80 +1,54 @@
1
- blastplus:
2
- binaries:
3
- - makeblastdb
4
- - blastn
5
- - tblastn
6
- - blastp
7
- - blastx
8
- - tblastx
9
- - makembindex
10
- - psiblast
11
- - rpsblast
12
- - blastdbcmd
13
- - segmasker
14
- - dustmasker
15
- - blast_formatter
16
- - windowmasker
17
- - blastdb_aliastool
18
- - deltablast
19
- - rpstblastn
20
- - blastdbcheck
21
- version:
22
- number: '2.2.29'
23
- command: 'blastx -version'
24
- url:
25
- 64bit:
26
- macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
27
- linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
28
1
  snap:
29
2
  binaries:
30
3
  - snap
31
4
  version:
32
- number: '1.0dev.63'
5
+ number: '1.0dev.67.trfix1'
33
6
  command: 'snap'
34
7
  url:
35
8
  64bit:
36
- linux: https://github.com/HibberdLab/snap/raw/dev/bin/linux/snap.tar.gz
37
- macosx: https://github.com/HibberdLab/snap/raw/dev/bin/macosx/snap.tar.gz
38
- samtools:
39
- binaries:
40
- - samtools
41
- - bcftools
42
- version:
43
- number: '0.1.19'
44
- command: 'samtools'
45
- url:
46
- 64bit:
47
- linux: https://github.com/cboursnell/samtools/raw/master/build/linux64.tar.gz
48
- macosx: https://github.com/cboursnell/samtools/raw/master/build/osx64.tar.gz
9
+ linux: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_linux.tar.gz
10
+ macosx: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_macosx.tar.gz
49
11
  bam-read:
50
12
  binaries:
51
13
  - bam-read
52
14
  version:
53
- number: '1.0.0.beta1'
15
+ number: '1.0.0.beta3'
54
16
  command: 'bam-read'
55
17
  url:
56
18
  64bit:
57
- linux: https://github.com/cboursnell/transrate-tools/raw/master/bin/linux/bam-read
58
- macosx: https://github.com/Blahah/transrate-tools/raw/master/bin/macosx/bam-read
59
- unpack: false
60
- bam-split:
61
- binaries:
62
- - bam-split
63
- version:
64
- number: '1.0.0.beta1'
65
- command: 'bam-split'
66
- url:
67
- 64bit:
68
- linux: https://github.com/Blahah/transrate-tools/raw/master/bin/linux/bam-split
69
- macosx: https://github.com/Blahah/transrate-tools/raw/master/bin/macosx/bam-split
70
- unpack: false
71
- express:
19
+ linux: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_linux.tar.gz
20
+ macosx: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_macosx.tar.gz
21
+ unpack: true
22
+ salmon:
72
23
  binaries:
73
- - express
24
+ - salmon
25
+ libraries:
26
+ - libbz2.so.1
27
+ - libgcc_s.so.1
28
+ - libgomp.so.1
29
+ - liblzma.so.0
30
+ - libm.so.6
31
+ - libpthread.so.0
32
+ - librt.so.1
33
+ - libstdc++.so.6
34
+ - libtbbmalloc_proxy.so,
35
+ - libtbbmalloc_proxy.so.2
36
+ - libtbbmalloc.so
37
+ - libtbbmalloc.so.2
38
+ - libtbb.so
39
+ - libtbb.so.2
40
+ - libz.so.1
41
+ - libcmph.0.dylib
42
+ - libcmph.dylib
43
+ - libcmph.la
44
+ - libstaden-read.la
45
+ - libtbb.dylib
46
+ - libtbbmalloc.dylib
47
+ - libtbbmalloc_proxy.dylib
74
48
  version:
75
- number: '1.5.1'
76
- command: 'express --version'
49
+ number: 'Salmon v0.2.7'
50
+ command: 'salmon --help'
77
51
  url:
78
52
  64bit:
79
- linux: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-linux_x86_64.tgz
80
- macosx: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-macosx_x86_64.tgz
53
+ linux: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_Ubuntu-12.04.tar.gz
54
+ macosx: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_MacOSX-10.10.1.tar.gz
@@ -55,6 +55,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
55
55
  }
56
56
  for (i=0; i < len; i++) {
57
57
  base = seq[i];
58
+ if (base > 90) {
59
+ base -= 32;
60
+ }
58
61
  switch (base) {
59
62
  case 'A': {
60
63
  idx=0;
@@ -81,6 +84,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
81
84
 
82
85
  if (i > 0) {
83
86
  prevbase = seq[i-1];
87
+ if (prevbase > 90) {
88
+ prevbase -= 32;
89
+ }
84
90
  switch (prevbase) {
85
91
  case 'A': {
86
92
  idx=idx;
@@ -138,6 +144,9 @@ VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
138
144
  n = 0;
139
145
  for(i = start; i < start+k; i++) {
140
146
  base = c_str[i];
147
+ if (base > 90) {
148
+ base -= 32;
149
+ }
141
150
  switch (base) {
142
151
  case 'A': {
143
152
  h = h << 2;
@@ -4,6 +4,8 @@ require 'forwardable'
4
4
 
5
5
  module Transrate
6
6
 
7
+ class AssemblyError < TransrateError; end
8
+
7
9
  # Container for a transcriptome assembly and its associated
8
10
  # metadata.
9
11
  #
@@ -41,13 +43,20 @@ module Transrate
41
43
  def initialize file
42
44
  @file = File.expand_path file
43
45
  unless File.exist? @file
44
- raise IOError.new "Assembly file doesn't exist: #{@file}"
46
+ raise TransrateIOError.new "Assembly file doesn't exist: #{@file}"
45
47
  end
46
48
  @assembly = {}
47
49
  @n_bases = 0
48
50
  Bio::FastaFormat.open(file).each do |entry|
49
51
  @n_bases += entry.length
50
52
  contig = Contig.new(entry)
53
+ if @assembly.key?(contig.name)
54
+ logger.error "Non unique fasta identifier found"
55
+ logger.error ">#{contig.name}"
56
+ logger.error "Please make sure there are no duplicate entries in the assembly"
57
+ logger.error "Contig name is taken from before the first | or space"
58
+ raise AssemblyError
59
+ end
51
60
  @assembly[contig.name] = contig
52
61
  end
53
62
  @contig_metrics = ContigMetrics.new self
@@ -125,7 +134,6 @@ module Transrate
125
134
  # and iterate over them
126
135
  bin.sort_by! { |c| c.seq.length }
127
136
  bin.each do |contig|
128
-
129
137
  # increment our long contig counters if this
130
138
  # contig is above the thresholds
131
139
  if contig.length < 200
@@ -157,7 +165,6 @@ module Transrate
157
165
  cutoff = x2.pop / 100.0
158
166
  end
159
167
  end
160
-
161
168
  end
162
169
 
163
170
  # if there aren't enough sequences we might have no value for some
@@ -168,6 +175,11 @@ module Transrate
168
175
 
169
176
  # calculate and return the statistics as a hash
170
177
  mean = cumulative_length / @assembly.size
178
+ if @assembly.size * mean == 0
179
+ mean_orf_percent = 0
180
+ else
181
+ mean_orf_percent = 300 * orf_length_sum / (@assembly.size * mean)
182
+ end
171
183
  ns = Hash[x.map { |n| "n#{n}" }.zip(res)]
172
184
  {
173
185
  'n_seqs' => bin.size,
@@ -179,7 +191,7 @@ module Transrate
179
191
  'n_over_1k' => n_over_1k,
180
192
  'n_over_10k' => n_over_10k,
181
193
  'n_with_orf' => n_with_orf,
182
- 'mean_orf_percent' => 300 * orf_length_sum / (@assembly.size * mean)
194
+ 'mean_orf_percent' => mean_orf_percent
183
195
  }.merge ns
184
196
 
185
197
  end # basic_bin_stats
@@ -187,20 +199,18 @@ module Transrate
187
199
  def classify_contigs
188
200
  # create hash of file handles for each output
189
201
  base = File.basename @file
190
- files = Hash.new do
191
- %w(good fragmented chimeric bad).each do |type|
192
- handle = File.open("#{type}.#{base}", "wb")
193
- [type.to_sym, handle]
194
- end
202
+ files = {}
203
+ %w(good fragmented chimeric bad).each do |type|
204
+ files[type.to_sym] = File.open("#{type}.#{base}", "wb")
195
205
  end
196
206
  # loop through contigs writing them out to the appropriate file
197
207
  @assembly.each_pair do |name, contig|
198
208
  category = contig.classify
199
209
  handle = files[category]
200
- handle.push contig.to_fasta
210
+ handle.write contig.to_fasta
201
211
  end
202
212
  # close all the file handles
203
- files.each do |handle|
213
+ files.each do |type, handle|
204
214
  handle.close
205
215
  end
206
216
  end
@@ -55,12 +55,12 @@ module Transrate
55
55
  crbblast.reciprocals.each do |key, list|
56
56
  list.each_with_index do |hit, i|
57
57
  unless @reference.assembly.key? hit.target
58
- raise "#{hit.target} not in reference"
58
+ raise TransrateError.new "#{hit.target} not in reference"
59
59
  end
60
60
  @reference[hit.target].hits << hit
61
61
 
62
62
  unless @assembly.assembly.key? hit.query
63
- raise "#{hit.query} not in assembly"
63
+ raise TransrateError.new "#{hit.query} not in assembly"
64
64
  end
65
65
  contig = @assembly[hit.query]
66
66
  contig.has_crb = true
@@ -20,7 +20,12 @@ module Transrate
20
20
  attr_accessor :hits
21
21
 
22
22
  def initialize(seq, name: nil)
23
- seq.seq.gsub!("\0", "") # there is probably a better fix than this
23
+ # fix null bytes in the nucleotide sequence
24
+ seq.seq.gsub!("\0", "")
25
+ # trim trailing semicolons (because BLAST strips them)
26
+ if seq.respond_to?(:entry_id)
27
+ seq.entry_id.gsub!(/;$/, '')
28
+ end
24
29
  @seq = seq
25
30
  @seq.data = nil # no need to store raw fasta string
26
31
  @name = seq.respond_to?(:entry_id) ? seq.entry_id : name
@@ -248,13 +253,13 @@ module Transrate
248
253
  return :good if score >= 0.5
249
254
  # fragmented?
250
255
  if in_bridges > 5
251
- if p_not_segmented * p_bases_covered * p_seq_true * p_unique >= 0.5
256
+ if p_not_segmented * p_bases_covered * p_seq_true >= 0.5
252
257
  return :fragmented
253
258
  end
254
259
  end
255
260
  # chimeric?
256
261
  if p_not_segmented < 0.25
257
- if p_good * p_bases_covered * p_seq_true * p_unique >= 0.5
262
+ if p_good * p_bases_covered * p_seq_true >= 0.5
258
263
  return :chimeric
259
264
  end
260
265
  end