transrate 1.0.0.beta1 → 1.0.0.beta2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 015defaf8abd6b99791790ba1d59b016345c78b7
4
- data.tar.gz: ea71a0e84c35c4fbc002314f8730530671c5b38c
3
+ metadata.gz: b90d296192b895be2cb4f5411ccc716264f5f360
4
+ data.tar.gz: 54fa1d66571912a1526aa7bd0d9ed549a04c8b19
5
5
  SHA512:
6
- metadata.gz: 1c27bdcb6cdc2bf1c855fd8d30e1b834a09e1557974880b7f18a06f0eca7cd383504237a2f299bfa90e1f52bcbe3395b9b263d021c3ebc3bd02f092f6df9115d
7
- data.tar.gz: bdcbe15c1dcd33233aada0b55955c8ad6d7cf1cc73f54893ea131f2e7f445b6447d9d2f47de8a1b82786413db275966d3e7db80d65e84a020020d2c22b74a3f1
6
+ metadata.gz: dc43e7f297ce2d53a03f9a7fcdde87124c605750cb6947d35e32185f5de92f53785fab37302e3abde3e4a2f7736aee3680fb415ab413d914720c025ebbff5f87
7
+ data.tar.gz: a122304dae2f6009c22e9d396c2b2f874cfa47937e582b1b5e5342cdf2a6d7eaeddcca465b3571022f7979d385f1c4014541a1ac1c5ca241eacaa8e8a4fd84e6
data/.gitignore CHANGED
@@ -19,6 +19,7 @@ tmp
19
19
  \#*
20
20
  *so
21
21
  dryrun
22
+ .DS_Store
22
23
 
23
24
  # YARD artifacts
24
25
  .yardoc
data/.travis.yml CHANGED
@@ -2,8 +2,16 @@ language: ruby
2
2
  rvm:
3
3
  - "2.0.0"
4
4
  - "2.1.0"
5
+ - "2.2.0"
5
6
  before_script:
6
7
  - gem install bindeps
7
8
  - bundle install
8
9
  - bundle exec rake compile
9
10
  - bundle exec bin/transrate --install-deps
11
+ os:
12
+ - linux
13
+ - osx
14
+ matrix:
15
+ allow_failures:
16
+ - os: osx
17
+ - rvm: "2.2.0"
data/CITATION ADDED
@@ -0,0 +1,3 @@
1
+ To cite transrate in publications, please use:
2
+
3
+ Smith-Unna, Richard D.; Boursnell, Chris M.; Hibberd, Julian M. and Kelly, Steven (2014). Transrate: v1.0.0 beta 1. Github: https://github.com/Blahah/transrate. 10.5281/zenodo.13161
data/README.md CHANGED
@@ -24,7 +24,7 @@ This software is being actively developed. Please be aware that there may be bug
24
24
 
25
25
  ## Citation
26
26
 
27
- Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/3687/Blahah/transrate.png)](http://dx.doi.org/10.5281/zenodo.11039).
27
+ Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/doi/10.5281/zenodo.13161.svg)](http://dx.doi.org/10.5281/zenodo.13161).
28
28
 
29
29
  ## Documentation
30
30
 
data/Rakefile CHANGED
@@ -57,5 +57,76 @@ Rake::TestTask.new do |t|
57
57
  t.test_files = ['test/test_contig.rb']
58
58
  end
59
59
 
60
+ Rake::TestTask.new do |t|
61
+ t.name = :assembly
62
+ t.libs << 'test'
63
+ t.test_files = ['test/test_assembly.rb']
64
+ end
65
+
66
+ Rake::TestTask.new do |t|
67
+ t.name = :snap
68
+ t.libs << 'test'
69
+ t.test_files = ['test/test_snap.rb']
70
+ end
71
+
72
+ Rake::TestTask.new do |t|
73
+ t.name = :salmon
74
+ t.libs << 'test'
75
+ t.test_files = ['test/test_salmon.rb']
76
+ end
77
+
78
+
60
79
  desc "Run tests"
61
80
  task :default => :test
81
+
82
+ # PACKAGING
83
+
84
+ PACKAGE_NAME = "transrate"
85
+ VERSION = "1.0.0.beta2"
86
+ TRAVELING_RUBY_VERSION = "20141215-2.1.5"
87
+
88
+ desc "Package your app"
89
+ task :package => ['package:linux:x86_64', 'package:osx']
90
+
91
+ namespace :package do
92
+ namespace :linux do
93
+ desc "Package your app for Linux x86_64"
94
+ task :x86_64 => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
95
+ create_package("linux-x86_64")
96
+ end
97
+ end
98
+
99
+ desc "Package your app for OS X"
100
+ task :osx => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
101
+ create_package("osx")
102
+ end
103
+ end
104
+
105
+ file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
106
+ download_runtime("linux-x86_64")
107
+ end
108
+
109
+ file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
110
+ download_runtime("osx")
111
+ end
112
+
113
+ def create_package(target)
114
+ package_dir = "packaging/#{PACKAGE_NAME}-#{VERSION}-#{target}"
115
+ sh "rm -rf #{package_dir}"
116
+ sh "mkdir -p #{package_dir}/lib/app"
117
+ sh "cp -r lib #{package_dir}/lib/app/"
118
+ sh "cp -r bin #{package_dir}/lib/app/"
119
+ sh "mkdir #{package_dir}/lib/ruby"
120
+ sh "tar -xzf packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz -C #{package_dir}/lib/ruby"
121
+ sh "cp packaging/transrate #{package_dir}/transrate"
122
+ if !ENV['DIR_ONLY']
123
+ sh "tar -czf #{package_dir}.tar.gz #{package_dir}"
124
+ sh "rm -rf #{package_dir}"
125
+ end
126
+ end
127
+
128
+ def download_runtime(target)
129
+ sh "mkdir -p packaging/packaging &&" +
130
+ "cd packaging/packaging && curl -L -O --fail " +
131
+ "http://d6r77u77i8pq3.cloudfront.net/releases/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz"
132
+ end
data/bin/transrate CHANGED
@@ -1,14 +1,32 @@
1
1
  #!/usr/bin/env ruby
2
- PROFILE = false
3
2
 
4
3
  require 'trollop'
5
4
  require 'transrate'
6
5
  require 'csv'
7
6
  require 'bindeps'
8
- require 'ruby-prof'
9
7
 
8
+ include Transrate
9
+
10
+ # Show the help message if no arguments provided
10
11
  ARGV[0] = "--help" if ARGV.length() == 0
11
12
 
13
+ # We want clean error messages through the logger, no ugly backtraces
14
+ # because the user doesn't care about them, unless they specifically ask for
15
+ # them with --loglevel debug
16
+ module Kernel
17
+ alias _raise raise
18
+
19
+ def raise(*a)
20
+ begin
21
+ _raise(*a)
22
+ rescue TransrateError => e
23
+ logger.error e.message
24
+ logger.debug e.backtrace unless e.backtrace.nil?
25
+ exit 1
26
+ end
27
+ end
28
+ end
29
+
12
30
  opts = Trollop::options do
13
31
  version Transrate::VERSION::STRING.dup
14
32
  banner <<-EOS
@@ -19,8 +37,8 @@ opts = Trollop::options do
19
37
  DESCRIPTION:
20
38
  Analyse a de-novo transcriptome assembly using three kinds of metrics:
21
39
 
22
- 1. contig-based
23
- 2. read-mapping (if --left and --right are provided)
40
+ 1. sequence-based (basic)
41
+ 2. read-mapping-based (if --left and --right are provided)
24
42
  3. reference-based (if --reference is provided)
25
43
 
26
44
  Bug reports and feature requests at:
@@ -34,10 +52,10 @@ opts = Trollop::options do
34
52
  transrate --install-deps
35
53
  # get the transrate score for the assembly and each contig
36
54
  transrate --assembly contigs.fa --left left.fq --right right.fq
37
- # contig metrics only
55
+ # basic assembly metrics only
38
56
  transrate --assembly contigs.fa
39
- # contig and reference-based metrics with 8 threads
40
- transrate --assembly contigs.fa --reference Athaliana_protein.fa --threads 8
57
+ # basic and reference-based metrics with 8 threads
58
+ transrate --assembly contigs.fa --reference Athaliana_transcripts.fa --threads 8
41
59
  # contig and read-based metrics for two assemblies with 32 threads
42
60
  transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
43
61
 
@@ -61,61 +79,109 @@ opts = Trollop::options do
61
79
  "one of [error, info, warn, debug]",
62
80
  :default => 'info'
63
81
  opt :install_deps, "install any missing dependencies"
82
+ opt :install_read_deps, "install missing dependencies for read metrics only"
83
+ opt :install_ref_deps, "install missing dependencies for reference metrics only"
64
84
  end
85
+
86
+ # Check dependencies if they are relevant to the command issued,
87
+ # and handle any commands to install missing ones
65
88
  gem_dir = Gem.loaded_specs['transrate'].full_gem_path
66
89
  gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
67
- if opts.install_deps
90
+ blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
91
+
92
+ if opts.install_deps || opts.install_read_deps || opts.install_ref_deps
93
+ # user has requested dependency installation
68
94
  puts "Checking dependencies"
69
- Bindeps.require gem_deps
95
+
96
+ missing = []
97
+ if opts.install_deps || opts.install_read_deps
98
+ Bindeps.require gem_deps
99
+ missing += Bindeps.missing gem_deps
100
+ end
101
+
102
+ if opts.install_deps || opts.install_ref_deps
103
+ Bindeps.require blast_dep
104
+ missing += Bindeps.missing blast_dep
105
+ end
106
+
107
+ unless missing.empty?
108
+ raise TransrateError.new "Failed to install: \n - #{missing.join('\n - ')}"
109
+ end
110
+
70
111
  puts "All dependencies installed"
71
112
  exit
113
+
72
114
  else
73
- missing = Bindeps.missing gem_deps
74
- if missing.length > 0
115
+ # no dependency installation requested, but check dependencies
116
+ # for the commands provided are installed
117
+ missing = []
118
+ missing = Bindeps.missing gem_deps if opts.left
119
+ blast_missing = []
120
+ blast_missing = Bindeps.missing blast_dep if opts.reference
121
+
122
+ if missing.length + blast_missing.length > 0
75
123
  puts "Dependencies are missing:"
124
+
76
125
  missing.each do |dep|
77
126
  puts " - #{dep.name} (#{dep.version})"
78
127
  end
79
- puts "To install all missing dependencies, run `transrate --install-deps`"
80
- exit(1)
128
+
129
+ blast_missing.each do |dep|
130
+ puts " - #{dep.name} (#{dep.version})"
131
+ end
132
+
133
+ puts "To install all missing dependencies, run:"
134
+ puts " transrate --install-deps"
135
+ puts "If you only want the read-metrics dependencies:"
136
+ puts " transrate --install-read-deps"
137
+ puts "Or if you only want the reference-metrics dependencies: "
138
+ puts " transrate --install-ref-deps"
139
+
140
+ exit 1
81
141
  end
142
+
143
+ end
144
+
145
+ # Handle commands
146
+ unless %w[error info warn debug].include? opts.loglevel
147
+ raise "Loglevel #{opts.loglevel} is not valid. " +
148
+ "It must be one of: error, info, warn, debug."
82
149
  end
83
150
 
151
+ logger.level = Yell::Level.new opts.loglevel.to_sym
152
+
84
153
  if opts.assembly
85
154
  opts.assembly.split(',').each do |assembly_file|
86
155
  unless File.exist?(assembly_file)
87
- raise IOError.new "Assembly fasta file does not exist: #{assembly_file}"
156
+ raise TransrateIOError.new "Assembly fasta file does not exist: " +
157
+ " #{assembly_file}"
88
158
  end
89
159
  end
90
160
  else
91
- raise ArgumentError.new "Option --assembly must be specified. " +
92
- "Try --help for help."
161
+ raise TransrateArgError.new "Option --assembly must be specified. " +
162
+ "Try --help for help."
93
163
  end
94
164
 
95
165
  if opts.reference && !File.exist?(opts.reference)
96
- raise IOError.new "Reference fasta file does not exist: #{opts.reference}"
166
+ raise TransrateIOError.new "Reference fasta file does not exist: " +
167
+ " #{opts.reference}"
97
168
  end
98
169
 
99
170
  if opts.left and opts.right
100
171
  if opts.left.split(",").length != opts.right.split(",").length
101
172
  msg = "Please provide the same number of left reads as right reads"
102
- raise ArgumentError.new(msg)
173
+ raise TransrateArgError.new msg
103
174
  end
104
175
  opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
105
176
  if !File.exist?(left)
106
- raise IOError.new "Left read fastq file does not exist: #{left}"
177
+ raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
107
178
  end
108
179
  if !File.exist?(right)
109
- raise IOError.new "Right read fastq file does not exist: #{right}"
180
+ raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
110
181
  end
111
182
  end
112
183
  end
113
184
 
114
- if PROFILE
115
- logger.info "Starting profiler"
116
- RubyProf.start
117
- end
118
-
119
185
  def pretty_print_hash hash, width
120
186
  hash.map do |k, v|
121
187
  # show as float if there are any decimal places
@@ -133,15 +199,6 @@ def pretty_print_hash hash, width
133
199
  end
134
200
  end
135
201
 
136
- include Transrate
137
-
138
- unless %w[error info warn debug].include? opts.loglevel
139
- raise "Loglevel #{opts.loglevel} is not valid. " +
140
- "It must be one of: error, info, warn, debug."
141
- end
142
-
143
- logger.level = Yell::Level.new opts.loglevel.to_sym
144
-
145
202
  r = opts.reference ? Assembly.new(opts.reference) : nil
146
203
  report_width = 35
147
204
 
@@ -251,6 +308,7 @@ opts.assembly.split(',').each do |assembly|
251
308
  all << contig_results.merge(read_results)
252
309
  .merge(comparative_results)
253
310
  .merge({ :assembly => assembly })
311
+ .merge({ :score => score })
254
312
 
255
313
  end
256
314
 
@@ -266,10 +324,3 @@ CSV.open(outfile, 'wb') do |file|
266
324
  file << head.map { |x| row[x] }
267
325
  end
268
326
  end
269
-
270
- if PROFILE
271
- logger.info "Writing profiling results to transrate_profile.txt"
272
- result = RubyProf.stop
273
- printer = RubyProf::FlatPrinter.new(result)
274
- printer.print(File.open('transrate_profile.txt', 'w'))
275
- end
data/deps/blast.yaml ADDED
@@ -0,0 +1,27 @@
1
+ blastplus:
2
+ binaries:
3
+ - makeblastdb
4
+ - blastn
5
+ - tblastn
6
+ - blastp
7
+ - blastx
8
+ - tblastx
9
+ - makembindex
10
+ - psiblast
11
+ - rpsblast
12
+ - blastdbcmd
13
+ - segmasker
14
+ - dustmasker
15
+ - blast_formatter
16
+ - windowmasker
17
+ - blastdb_aliastool
18
+ - deltablast
19
+ - rpstblastn
20
+ - blastdbcheck
21
+ version:
22
+ number: '2.2.[0-9]'
23
+ command: 'blastx -version'
24
+ url:
25
+ 64bit:
26
+ macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
27
+ linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
data/deps/deps.yaml CHANGED
@@ -1,80 +1,54 @@
1
- blastplus:
2
- binaries:
3
- - makeblastdb
4
- - blastn
5
- - tblastn
6
- - blastp
7
- - blastx
8
- - tblastx
9
- - makembindex
10
- - psiblast
11
- - rpsblast
12
- - blastdbcmd
13
- - segmasker
14
- - dustmasker
15
- - blast_formatter
16
- - windowmasker
17
- - blastdb_aliastool
18
- - deltablast
19
- - rpstblastn
20
- - blastdbcheck
21
- version:
22
- number: '2.2.29'
23
- command: 'blastx -version'
24
- url:
25
- 64bit:
26
- macosx: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-universal-macosx.tar.gz
27
- linux: ftp://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/2.2.29/ncbi-blast-2.2.29+-x64-linux.tar.gz
28
1
  snap:
29
2
  binaries:
30
3
  - snap
31
4
  version:
32
- number: '1.0dev.63'
5
+ number: '1.0dev.67.trfix1'
33
6
  command: 'snap'
34
7
  url:
35
8
  64bit:
36
- linux: https://github.com/HibberdLab/snap/raw/dev/bin/linux/snap.tar.gz
37
- macosx: https://github.com/HibberdLab/snap/raw/dev/bin/macosx/snap.tar.gz
38
- samtools:
39
- binaries:
40
- - samtools
41
- - bcftools
42
- version:
43
- number: '0.1.19'
44
- command: 'samtools'
45
- url:
46
- 64bit:
47
- linux: https://github.com/cboursnell/samtools/raw/master/build/linux64.tar.gz
48
- macosx: https://github.com/cboursnell/samtools/raw/master/build/osx64.tar.gz
9
+ linux: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_linux.tar.gz
10
+ macosx: https://github.com/Blahah/snap/releases/download/v1.0dev.67.trfix1/snap_v1.0dev.67.trfix1_macosx.tar.gz
49
11
  bam-read:
50
12
  binaries:
51
13
  - bam-read
52
14
  version:
53
- number: '1.0.0.beta1'
15
+ number: '1.0.0.beta3'
54
16
  command: 'bam-read'
55
17
  url:
56
18
  64bit:
57
- linux: https://github.com/cboursnell/transrate-tools/raw/master/bin/linux/bam-read
58
- macosx: https://github.com/Blahah/transrate-tools/raw/master/bin/macosx/bam-read
59
- unpack: false
60
- bam-split:
61
- binaries:
62
- - bam-split
63
- version:
64
- number: '1.0.0.beta1'
65
- command: 'bam-split'
66
- url:
67
- 64bit:
68
- linux: https://github.com/Blahah/transrate-tools/raw/master/bin/linux/bam-split
69
- macosx: https://github.com/Blahah/transrate-tools/raw/master/bin/macosx/bam-split
70
- unpack: false
71
- express:
19
+ linux: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_linux.tar.gz
20
+ macosx: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_macosx.tar.gz
21
+ unpack: true
22
+ salmon:
72
23
  binaries:
73
- - express
24
+ - salmon
25
+ libraries:
26
+ - libbz2.so.1
27
+ - libgcc_s.so.1
28
+ - libgomp.so.1
29
+ - liblzma.so.0
30
+ - libm.so.6
31
+ - libpthread.so.0
32
+ - librt.so.1
33
+ - libstdc++.so.6
34
+ - libtbbmalloc_proxy.so,
35
+ - libtbbmalloc_proxy.so.2
36
+ - libtbbmalloc.so
37
+ - libtbbmalloc.so.2
38
+ - libtbb.so
39
+ - libtbb.so.2
40
+ - libz.so.1
41
+ - libcmph.0.dylib
42
+ - libcmph.dylib
43
+ - libcmph.la
44
+ - libstaden-read.la
45
+ - libtbb.dylib
46
+ - libtbbmalloc.dylib
47
+ - libtbbmalloc_proxy.dylib
74
48
  version:
75
- number: '1.5.1'
76
- command: 'express --version'
49
+ number: 'Salmon v0.2.7'
50
+ command: 'salmon --help'
77
51
  url:
78
52
  64bit:
79
- linux: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-linux_x86_64.tgz
80
- macosx: http://bio.math.berkeley.edu/eXpress/downloads/express-1.5.1/express-1.5.1-macosx_x86_64.tgz
53
+ linux: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_Ubuntu-12.04.tar.gz
54
+ macosx: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_MacOSX-10.10.1.tar.gz
@@ -55,6 +55,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
55
55
  }
56
56
  for (i=0; i < len; i++) {
57
57
  base = seq[i];
58
+ if (base > 90) {
59
+ base -= 32;
60
+ }
58
61
  switch (base) {
59
62
  case 'A': {
60
63
  idx=0;
@@ -81,6 +84,9 @@ VALUE method_composition(VALUE self, VALUE _seq) {
81
84
 
82
85
  if (i > 0) {
83
86
  prevbase = seq[i-1];
87
+ if (prevbase > 90) {
88
+ prevbase -= 32;
89
+ }
84
90
  switch (prevbase) {
85
91
  case 'A': {
86
92
  idx=idx;
@@ -138,6 +144,9 @@ VALUE method_kmer_count(VALUE self, VALUE _k, VALUE _s) {
138
144
  n = 0;
139
145
  for(i = start; i < start+k; i++) {
140
146
  base = c_str[i];
147
+ if (base > 90) {
148
+ base -= 32;
149
+ }
141
150
  switch (base) {
142
151
  case 'A': {
143
152
  h = h << 2;
@@ -4,6 +4,8 @@ require 'forwardable'
4
4
 
5
5
  module Transrate
6
6
 
7
+ class AssemblyError < TransrateError; end
8
+
7
9
  # Container for a transcriptome assembly and its associated
8
10
  # metadata.
9
11
  #
@@ -41,13 +43,20 @@ module Transrate
41
43
  def initialize file
42
44
  @file = File.expand_path file
43
45
  unless File.exist? @file
44
- raise IOError.new "Assembly file doesn't exist: #{@file}"
46
+ raise TransrateIOError.new "Assembly file doesn't exist: #{@file}"
45
47
  end
46
48
  @assembly = {}
47
49
  @n_bases = 0
48
50
  Bio::FastaFormat.open(file).each do |entry|
49
51
  @n_bases += entry.length
50
52
  contig = Contig.new(entry)
53
+ if @assembly.key?(contig.name)
54
+ logger.error "Non unique fasta identifier found"
55
+ logger.error ">#{contig.name}"
56
+ logger.error "Please make sure there are no duplicate entries in the assembly"
57
+ logger.error "Contig name is taken from before the first | or space"
58
+ raise AssemblyError
59
+ end
51
60
  @assembly[contig.name] = contig
52
61
  end
53
62
  @contig_metrics = ContigMetrics.new self
@@ -125,7 +134,6 @@ module Transrate
125
134
  # and iterate over them
126
135
  bin.sort_by! { |c| c.seq.length }
127
136
  bin.each do |contig|
128
-
129
137
  # increment our long contig counters if this
130
138
  # contig is above the thresholds
131
139
  if contig.length < 200
@@ -157,7 +165,6 @@ module Transrate
157
165
  cutoff = x2.pop / 100.0
158
166
  end
159
167
  end
160
-
161
168
  end
162
169
 
163
170
  # if there aren't enough sequences we might have no value for some
@@ -168,6 +175,11 @@ module Transrate
168
175
 
169
176
  # calculate and return the statistics as a hash
170
177
  mean = cumulative_length / @assembly.size
178
+ if @assembly.size * mean == 0
179
+ mean_orf_percent = 0
180
+ else
181
+ mean_orf_percent = 300 * orf_length_sum / (@assembly.size * mean)
182
+ end
171
183
  ns = Hash[x.map { |n| "n#{n}" }.zip(res)]
172
184
  {
173
185
  'n_seqs' => bin.size,
@@ -179,7 +191,7 @@ module Transrate
179
191
  'n_over_1k' => n_over_1k,
180
192
  'n_over_10k' => n_over_10k,
181
193
  'n_with_orf' => n_with_orf,
182
- 'mean_orf_percent' => 300 * orf_length_sum / (@assembly.size * mean)
194
+ 'mean_orf_percent' => mean_orf_percent
183
195
  }.merge ns
184
196
 
185
197
  end # basic_bin_stats
@@ -187,20 +199,18 @@ module Transrate
187
199
  def classify_contigs
188
200
  # create hash of file handles for each output
189
201
  base = File.basename @file
190
- files = Hash.new do
191
- %w(good fragmented chimeric bad).each do |type|
192
- handle = File.open("#{type}.#{base}", "wb")
193
- [type.to_sym, handle]
194
- end
202
+ files = {}
203
+ %w(good fragmented chimeric bad).each do |type|
204
+ files[type.to_sym] = File.open("#{type}.#{base}", "wb")
195
205
  end
196
206
  # loop through contigs writing them out to the appropriate file
197
207
  @assembly.each_pair do |name, contig|
198
208
  category = contig.classify
199
209
  handle = files[category]
200
- handle.push contig.to_fasta
210
+ handle.write contig.to_fasta
201
211
  end
202
212
  # close all the file handles
203
- files.each do |handle|
213
+ files.each do |type, handle|
204
214
  handle.close
205
215
  end
206
216
  end
@@ -55,12 +55,12 @@ module Transrate
55
55
  crbblast.reciprocals.each do |key, list|
56
56
  list.each_with_index do |hit, i|
57
57
  unless @reference.assembly.key? hit.target
58
- raise "#{hit.target} not in reference"
58
+ raise TransrateError.new "#{hit.target} not in reference"
59
59
  end
60
60
  @reference[hit.target].hits << hit
61
61
 
62
62
  unless @assembly.assembly.key? hit.query
63
- raise "#{hit.query} not in assembly"
63
+ raise TransrateError.new "#{hit.query} not in assembly"
64
64
  end
65
65
  contig = @assembly[hit.query]
66
66
  contig.has_crb = true
@@ -20,7 +20,12 @@ module Transrate
20
20
  attr_accessor :hits
21
21
 
22
22
  def initialize(seq, name: nil)
23
- seq.seq.gsub!("\0", "") # there is probably a better fix than this
23
+ # fix null bytes in the nucleotide sequence
24
+ seq.seq.gsub!("\0", "")
25
+ # trim trailing semicolons (because BLAST strips them)
26
+ if seq.respond_to?(:entry_id)
27
+ seq.entry_id.gsub!(/;$/, '')
28
+ end
24
29
  @seq = seq
25
30
  @seq.data = nil # no need to store raw fasta string
26
31
  @name = seq.respond_to?(:entry_id) ? seq.entry_id : name
@@ -248,13 +253,13 @@ module Transrate
248
253
  return :good if score >= 0.5
249
254
  # fragmented?
250
255
  if in_bridges > 5
251
- if p_not_segmented * p_bases_covered * p_seq_true * p_unique >= 0.5
256
+ if p_not_segmented * p_bases_covered * p_seq_true >= 0.5
252
257
  return :fragmented
253
258
  end
254
259
  end
255
260
  # chimeric?
256
261
  if p_not_segmented < 0.25
257
- if p_good * p_bases_covered * p_seq_true * p_unique >= 0.5
262
+ if p_good * p_bases_covered * p_seq_true >= 0.5
258
263
  return :chimeric
259
264
  end
260
265
  end