transrate 1.0.0.beta2 → 1.0.0.beta3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +17 -0
  3. data/README.md +3 -1
  4. data/Rakefile +59 -15
  5. data/bin/transrate +69 -29
  6. data/deps/deps.yaml +11 -19
  7. data/lib/transrate/assembly.rb +11 -4
  8. data/lib/transrate/contig.rb +9 -18
  9. data/lib/transrate/read_metrics.rb +7 -17
  10. data/lib/transrate/salmon.rb +2 -0
  11. data/lib/transrate/score_optimiser.rb +38 -3
  12. data/lib/transrate/transrater.rb +24 -11
  13. data/lib/transrate/version.rb +1 -1
  14. data/test/test_assembly.rb +2 -4
  15. data/test/test_contig.rb +1 -1
  16. data/test/test_optimiser.rb +27 -0
  17. data/test/test_read_metrics.rb +4 -5
  18. data/transrate.gemspec +2 -3
  19. metadata +6 -52
  20. data/.travis.yml +0 -17
  21. data/docs/transrate_logo_full.png +0 -0
  22. data/test/vagrant/centos_6.5_64/Vagrantfile +0 -122
  23. data/test/vagrant/debian_7.4_64/Vagrantfile +0 -126
  24. data/test/vagrant/debian_7.4_64/provision.sh +0 -28
  25. data/test/vagrant/fedora_20_64/Vagrantfile +0 -122
  26. data/test/vagrant/fedora_20_64/provision.sh +0 -16
  27. data/test/vagrant/fedora_20_64/sample_data/params.xprs +0 -182
  28. data/test/vagrant/fedora_20_64/sample_data/reads_1.fastq +0 -40000
  29. data/test/vagrant/fedora_20_64/sample_data/reads_1.fastq-reads_2.fastq-read_count.txt +0 -1
  30. data/test/vagrant/fedora_20_64/sample_data/reads_2.fastq +0 -40000
  31. data/test/vagrant/fedora_20_64/sample_data/transcripts.fasta +0 -498
  32. data/test/vagrant/fedora_20_64/sample_data/transcripts.fasta_results.xprs +0 -16
  33. data/test/vagrant/fedora_20_64/sample_data/transcripts/Genome +0 -17
  34. data/test/vagrant/fedora_20_64/sample_data/transcripts/GenomeIndex +0 -1
  35. data/test/vagrant/fedora_20_64/sample_data/transcripts/GenomeIndexHash +0 -0
  36. data/test/vagrant/fedora_20_64/sample_data/transcripts/OverflowTable +0 -0
  37. data/test/vagrant/ubuntu_12.04_64/Vagrantfile +0 -126
  38. data/test/vagrant/ubuntu_12.04_64/provision.sh +0 -24
  39. data/test/vagrant/ubuntu_12.04_64/sample_data/params.xprs +0 -182
  40. data/test/vagrant/ubuntu_12.04_64/sample_data/reads_1.fastq +0 -40000
  41. data/test/vagrant/ubuntu_12.04_64/sample_data/reads_1.fastq-reads_2.fastq-read_count.txt +0 -1
  42. data/test/vagrant/ubuntu_12.04_64/sample_data/reads_2.fastq +0 -40000
  43. data/test/vagrant/ubuntu_12.04_64/sample_data/transcripts.fasta +0 -498
  44. data/test/vagrant/ubuntu_12.04_64/sample_data/transcripts.fasta_results.xprs +0 -16
  45. data/test/vagrant/ubuntu_12.04_64/sample_data/transcripts/Genome +0 -17
  46. data/test/vagrant/ubuntu_12.04_64/sample_data/transcripts/GenomeIndex +0 -1
  47. data/test/vagrant/ubuntu_12.04_64/sample_data/transcripts/GenomeIndexHash +0 -0
  48. data/test/vagrant/ubuntu_12.04_64/sample_data/transcripts/OverflowTable +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: b90d296192b895be2cb4f5411ccc716264f5f360
4
- data.tar.gz: 54fa1d66571912a1526aa7bd0d9ed549a04c8b19
3
+ metadata.gz: a663790460cbe480bce88af21df3609e8b63a073
4
+ data.tar.gz: 22b4fb0af30dc78afeee914abd8b93d1e7dd7966
5
5
  SHA512:
6
- metadata.gz: dc43e7f297ce2d53a03f9a7fcdde87124c605750cb6947d35e32185f5de92f53785fab37302e3abde3e4a2f7736aee3680fb415ab413d914720c025ebbff5f87
7
- data.tar.gz: a122304dae2f6009c22e9d396c2b2f874cfa47937e582b1b5e5342cdf2a6d7eaeddcca465b3571022f7979d385f1c4014541a1ac1c5ca241eacaa8e8a4fd84e6
6
+ metadata.gz: ed8cad8ff40d18fd6ce7b1a78bf8a6dab2ef0b141d98a08c5403e8f766e4b11ab79f96e32f30abdae951c6fee3b5d31c3a2cc1d3c98a5a93c20ea258b6424183
7
+ data.tar.gz: a60476a414dc3626d80d5d1f8ed967c3dcb79563ce97454b79b5f5595451c4285582650591b5974ac0114980482fadd8f6e6d12c50f392a41cac04fe1562ce86
data/.gitignore CHANGED
@@ -43,6 +43,8 @@ dryrun
43
43
  *.coverage
44
44
 
45
45
  # c extension build artefacts
46
+ # note that we manually add these in a release,
47
+ # but don't want them updated in general
46
48
  Makefile
47
49
  transrate.bundle
48
50
  transrate.o
@@ -50,3 +52,18 @@ transrate.o
50
52
 
51
53
  # vagrant stuff
52
54
  .vagrant
55
+
56
+ # large test files
57
+ test/data/sorghum_100.1.fastq
58
+ test/data/sorghum_100.2.fastq
59
+
60
+ # packaging stuff
61
+ *.tar.gz
62
+ transrate-*
63
+ packaging/bindeps
64
+ packaging/vendor
65
+ packaging/packaging
66
+ traveling-ruby*
67
+ *box
68
+ libruby.so*
69
+ libruby*dylib
data/README.md CHANGED
@@ -36,7 +36,9 @@ Interested in helping? Great! We particularly would like help with the following
36
36
 
37
37
  - code review
38
38
  - documentation review
39
- - adding features
39
+ - adding features that are already discussed and approved on the issue tracker
40
40
  - tackling bugs
41
41
 
42
42
  For any of these, please just pick an appropriate issue [on the tracker](https://github.com/Blahah/transrate/issues) and make a pull request.
43
+
44
+ If you want to suggest, and maybe implement, a new feature, please suggest it on the tracker first. This allows us to give feedback on whether it makes sense given the scope of the software, and for the community to discuss requirements for the feature. *Don't* just implement the feature and make a pull request before dicussing it, because you'll probably find your PR waiting a very long time for review.
data/Rakefile CHANGED
@@ -1,5 +1,6 @@
1
1
  require 'rake/testtask'
2
2
  require 'rake/extensiontask'
3
+ require 'bundler/setup'
3
4
 
4
5
  Rake::ExtensionTask.new('transrate') do |ext|
5
6
  ext.lib_dir = "lib/transrate"
@@ -75,6 +76,13 @@ Rake::TestTask.new do |t|
75
76
  t.test_files = ['test/test_salmon.rb']
76
77
  end
77
78
 
79
+ Rake::TestTask.new do |t|
80
+ t.name = :optimiser
81
+ t.libs << 'test'
82
+ t.test_files = ['test/test_optimiser.rb']
83
+ end
84
+
85
+
78
86
 
79
87
  desc "Run tests"
80
88
  task :default => :test
@@ -82,22 +90,20 @@ task :default => :test
82
90
  # PACKAGING
83
91
 
84
92
  PACKAGE_NAME = "transrate"
85
- VERSION = "1.0.0.beta2"
86
- TRAVELING_RUBY_VERSION = "20141215-2.1.5"
93
+ VERSION = "1.0.0.beta3"
94
+ TRAVELING_RUBY_VERSION = "20150210-2.2.0"
87
95
 
88
96
  desc "Package your app"
89
- task :package => ['package:linux:x86_64', 'package:osx']
97
+ task :package => ['package:linux', 'package:osx']
90
98
 
91
99
  namespace :package do
92
- namespace :linux do
93
- desc "Package your app for Linux x86_64"
94
- task :x86_64 => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz" do
95
- create_package("linux-x86_64")
96
- end
100
+ desc "Package your app for Linux x86_64"
101
+ task :linux => [:bundle_install, "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-linux-x86_64.tar.gz"] do
102
+ create_package("linux-x86_64")
97
103
  end
98
104
 
99
105
  desc "Package your app for OS X"
100
- task :osx => "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" do
106
+ task :osx => [:bundle_install, "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz"] do
101
107
  create_package("osx")
102
108
  end
103
109
  end
@@ -110,19 +116,57 @@ file "packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-osx.tar.gz" d
110
116
  download_runtime("osx")
111
117
  end
112
118
 
119
+ desc "Install gems to local directory"
120
+ task :bundle_install do
121
+ if RUBY_VERSION !~ /^2\.2\./
122
+ abort "You can only 'bundle install' using Ruby 2.2, because that's what Traveling Ruby uses."
123
+ end
124
+ Bundler.with_clean_env do
125
+ sh "env BUNDLE_IGNORE_CONFIG=1 bundle install --path packaging/vendor --without development"
126
+ end
127
+ sh "rm -f packaging/vendor/*/*/cache/*"
128
+ end
129
+
113
130
  def create_package(target)
114
- package_dir = "packaging/#{PACKAGE_NAME}-#{VERSION}-#{target}"
131
+ package_pref = "#{PACKAGE_NAME}-#{VERSION}-#{target}"
132
+ package_dir = "packaging/#{package_pref}"
115
133
  sh "rm -rf #{package_dir}"
116
134
  sh "mkdir -p #{package_dir}/lib/app"
117
- sh "cp -r lib #{package_dir}/lib/app/"
118
- sh "cp -r bin #{package_dir}/lib/app/"
119
- sh "mkdir #{package_dir}/lib/ruby"
120
- sh "tar -xzf packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz -C #{package_dir}/lib/ruby"
135
+ # copy transrate gem to destination
136
+ sh "cp -r lib bin deps ext files.txt #{package_dir}/lib/app/"
137
+ # install travelling ruby
138
+ sh "mkdir #{package_dir}/lib/app/ruby"
139
+ sh "tar -xzf packaging/packaging/traveling-ruby-#{TRAVELING_RUBY_VERSION}-#{target}.tar.gz -C #{package_dir}/lib/app/ruby"
140
+ # install loading script for transrate
121
141
  sh "cp packaging/transrate #{package_dir}/transrate"
142
+ # install bundled gem dependencies
143
+ sh "cp -pR packaging/vendor #{package_dir}/lib/"
144
+ sh "cd #{package_dir} && ../minify.sh"
145
+ sh "cp -r #{package_dir}/lib/vendor/* #{package_dir}/lib/app/"
146
+ sh "cp Gemfile Gemfile.lock transrate.gemspec #{package_dir}/lib/app/"
147
+ sh "mkdir #{package_dir}/lib/app/.bundle"
148
+ sh "cp packaging/bundler-config #{package_dir}/lib/app/.bundle/config"
149
+ # free up some more space in the package dir
150
+ sh "rm -rf #{package_dir}/lib/vendor"
151
+ sh "rm -rf #{package_dir}/lib/app/ruby/*/gems/*/test"
152
+ # install binary dependencies
153
+ sh "mkdir -p packaging/bindeps/#{target}"
154
+ sh "rm -rf packaging/bindeps/#{target}/*"
155
+ sh "cp test/vagrant/#{target}/*.tar.gz packaging/bindeps/#{target}"
156
+ sh "mkdir packaging/bindeps/#{target}/{bin,lib}"
157
+ sh "cd packaging/bindeps/#{target} && " +
158
+ "find . -maxdepth 1 -name '*.tar.gz' -exec tar -xzf '{}' \\; && " +
159
+ "mv snap bam-read bin/"
160
+ sh "cp -r packaging/bindeps/#{target}/{bin,lib} #{package_dir}/"
161
+ # install c extension
162
+ sh "cp test/vagrant/#{target}/{transrate,libruby}.* #{package_dir}/lib/"
163
+ # create package
122
164
  if !ENV['DIR_ONLY']
123
- sh "tar -czf #{package_dir}.tar.gz #{package_dir}"
165
+ sh "cd packaging && tar -czf #{package_pref}.tar.gz #{package_pref}"
124
166
  sh "rm -rf #{package_dir}"
125
167
  end
168
+ # cleanup
169
+ sh "rm -rf packaging/vendor packaging/bindeps .bundle"
126
170
  end
127
171
 
128
172
  def download_runtime(target)
data/bin/transrate CHANGED
@@ -62,25 +62,28 @@ opts = Trollop::options do
62
62
  OPTIONS:
63
63
 
64
64
  EOS
65
- opt :assembly, "assembly file(s) in FASTA format, comma-separated",
65
+ opt :assembly, "Assembly file(s) in FASTA format, comma-separated",
66
66
  :type => String
67
- opt :reference, "reference proteome file in FASTA format",
67
+ opt :reference, "Reference proteome file in FASTA format",
68
68
  :type => String
69
- opt :left, "left reads file in FASTQ format",
69
+ opt :left, "Left reads file in FASTQ format",
70
70
  :type => String
71
- opt :right, "right reads file in FASTQ format",
71
+ opt :right, "Right reads file in FASTQ format",
72
72
  :type => String
73
- opt :threads, "number of threads to use",
73
+ opt :threads, "Number of threads to use",
74
74
  :default => 8,
75
75
  :type => Integer
76
- opt :outfile, "prefix filename to use for CSV output",
76
+ opt :merge_assemblies, "Merge multiple assemblies into file",
77
+ :type => String
78
+ opt :outfile, "Prefix filename to use for CSV output",
77
79
  :default => 'transrate'
78
- opt :loglevel, "the amount of information to print. " +
79
- "one of [error, info, warn, debug]",
80
+ opt :loglevel, "The amount of information to print. " +
81
+ "One of [error, info, warn, debug]",
80
82
  :default => 'info'
81
- opt :install_deps, "install any missing dependencies"
82
- opt :install_read_deps, "install missing dependencies for read metrics only"
83
- opt :install_ref_deps, "install missing dependencies for reference metrics only"
83
+ opt :install_deps, "Install any missing dependencies. One of [all, read, ref]",
84
+ :type => String, :default => nil
85
+ # opt :install_read_deps, "install missing dependencies for read metrics only"
86
+ # opt :install_ref_deps, "install missing dependencies for reference metrics only"
84
87
  end
85
88
 
86
89
  # Check dependencies if they are relevant to the command issued,
@@ -89,7 +92,20 @@ gem_dir = Gem.loaded_specs['transrate'].full_gem_path
89
92
  gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
90
93
  blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
91
94
 
92
- if opts.install_deps || opts.install_read_deps || opts.install_ref_deps
95
+ deps, read_deps, ref_deps = nil
96
+ unless opts.install_deps.nil?
97
+
98
+ unless %w[all read red].include? opts.install_deps
99
+ raise TransrateError.new "install-deps #{opts.install_deps} is not valid. " +
100
+ "You must specify one of: all, read, ref."
101
+ end
102
+
103
+ deps = opts.install_deps == 'all'
104
+ read_deps = opts.install_deps == 'read'
105
+ ref_deps = opts.install_deps == 'ref'
106
+ end
107
+
108
+ if deps || read_deps || ref_deps
93
109
  # user has requested dependency installation
94
110
  puts "Checking dependencies"
95
111
 
@@ -105,7 +121,9 @@ if opts.install_deps || opts.install_read_deps || opts.install_ref_deps
105
121
  end
106
122
 
107
123
  unless missing.empty?
108
- raise TransrateError.new "Failed to install: \n - #{missing.join('\n - ')}"
124
+ list = missing.collect {|i| "#{i.name}:#{i.version}"}.join("\n - ")
125
+ msg = "Failed to install: \n - #{list}"
126
+ raise TransrateError.new msg
109
127
  end
110
128
 
111
129
  puts "All dependencies installed"
@@ -144,7 +162,7 @@ end
144
162
 
145
163
  # Handle commands
146
164
  unless %w[error info warn debug].include? opts.loglevel
147
- raise "Loglevel #{opts.loglevel} is not valid. " +
165
+ raise TransrateError.new "Loglevel #{opts.loglevel} is not valid. " +
148
166
  "It must be one of: error, info, warn, debug."
149
167
  end
150
168
 
@@ -182,14 +200,14 @@ if opts.left and opts.right
182
200
  end
183
201
  end
184
202
 
185
- def pretty_print_hash hash, width
203
+ def pretty_print_hash hash, width, round=2
186
204
  hash.map do |k, v|
187
205
  # show as float if there are any decimal places
188
- if v.to_f.round(2).to_s.split('.').last.to_i > 0
189
- v = v.to_f.round(2)
206
+ if v.to_f.round(round).to_s.split('.').last.to_i > 0
207
+ v = v.to_f.round(round)
190
208
  end
191
209
  if v.is_a? Float
192
- v = v.round(2)
210
+ v = v.round(round)
193
211
  end
194
212
  pad = (width - (k.to_s.length + v.to_s.length))
195
213
  pad = [pad, 0].max
@@ -204,18 +222,35 @@ report_width = 35
204
222
 
205
223
  # loop through the assemblies, storing their outputs in an array of hashes
206
224
  all = []
207
- opts.assembly.split(',').each do |assembly|
225
+
226
+ assemblies=opts.assembly
227
+ if opts.merge_assemblies
228
+ merged_file = opts.merge_assemblies
229
+ merged = {}
230
+ assemblies.split(",").each do |file|
231
+ Bio::FastaFormat.open(file).each do |entry|
232
+ contig_name = "#{File.basename(file,File.extname(file))}:"
233
+ contig_name << "#{entry.entry_id}"
234
+ merged[contig_name] = entry.seq
235
+ end
236
+ end
237
+ logger.info "Merging assemblies into one file...'#{merged_file}'"
238
+ File.open(merged_file, "wb") do |out|
239
+ merged.each do |name, seq|
240
+ out.write ">#{name}\n"
241
+ out.write "#{seq}\n"
242
+ end
243
+ end
244
+
245
+ assemblies = merged_file
246
+ end
247
+
248
+ assemblies.split(',').each do |assembly|
208
249
 
209
250
  logger.info "Loading assembly: #{assembly}"
210
251
 
211
252
  a = Assembly.new assembly
212
- transrater = Transrater.new(a, r,
213
- left: opts.left,
214
- right: opts.right,
215
- insertsize: opts.insertsize,
216
- insertsd: opts.insertsd,
217
- threads: opts.threads)
218
-
253
+ transrater = Transrater.new(a, r, threads: opts.threads)
219
254
 
220
255
  logger.info "Analysing assembly: #{assembly}"
221
256
 
@@ -238,8 +273,7 @@ opts.assembly.split(',').each do |assembly|
238
273
  if (opts.left && opts.right)
239
274
  logger.info "Calculating read diagnostics..."
240
275
  t0 = Time.now
241
- read_results = transrater.read_metrics(opts.left,
242
- opts.right).read_stats
276
+ read_results = transrater.read_metrics(opts.left, opts.right).read_stats
243
277
 
244
278
  if read_results
245
279
  logger.info "Read mapping metrics:"
@@ -275,9 +309,13 @@ opts.assembly.split(',').each do |assembly|
275
309
 
276
310
  if (opts.left && opts.right)
277
311
  score = transrater.assembly_score
312
+ optimal, cutoff = transrater.assembly_optimal_score
278
313
  unless score.nil?
279
- logger.info "TRANSRATE ASSEMBLY SCORE: #{score.round(4)}"
314
+ pretty_print_hash({:TRANSRATE_ASSEMBLY_SCORE => score}, report_width, 4)
280
315
  logger.info "-" * report_width
316
+ pretty_print_hash({:TRANSRATE_OPTIMAL_SCORE => optimal}, report_width, 4)
317
+ pretty_print_hash({:TRANSRATE_OPTIMAL_CUTOFF => cutoff}, report_width, 4)
318
+ pretty_print_hash(transrater.good_contigs, report_width)
281
319
  end
282
320
  end
283
321
 
@@ -309,6 +347,8 @@ opts.assembly.split(',').each do |assembly|
309
347
  .merge(comparative_results)
310
348
  .merge({ :assembly => assembly })
311
349
  .merge({ :score => score })
350
+ .merge({ :optimal_score => optimal })
351
+ .merge({ :cutoff => cutoff })
312
352
 
313
353
  end
314
354
 
data/deps/deps.yaml CHANGED
@@ -12,43 +12,35 @@ bam-read:
12
12
  binaries:
13
13
  - bam-read
14
14
  version:
15
- number: '1.0.0.beta3'
15
+ number: '1.0.0.beta4'
16
16
  command: 'bam-read'
17
17
  url:
18
18
  64bit:
19
- linux: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_linux.tar.gz
20
- macosx: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta3/bam-read_1.0.0.beta3_macosx.tar.gz
19
+ linux: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta4/bam-read_v1.0.0.beta4_linux.tar.gz
20
+ macosx: https://github.com/Blahah/transrate-tools/releases/download/v1.0.0.beta4/bam-read_v1.0.0.beta4_osx.tar.gz
21
21
  unpack: true
22
22
  salmon:
23
23
  binaries:
24
24
  - salmon
25
25
  libraries:
26
- - libbz2.so.1
27
- - libgcc_s.so.1
28
26
  - libgomp.so.1
29
- - liblzma.so.0
30
27
  - libm.so.6
31
- - libpthread.so.0
32
28
  - librt.so.1
33
- - libstdc++.so.6
34
- - libtbbmalloc_proxy.so,
35
- - libtbbmalloc_proxy.so.2
36
- - libtbbmalloc.so
37
- - libtbbmalloc.so.2
38
29
  - libtbb.so
39
30
  - libtbb.so.2
40
- - libz.so.1
31
+ - libtbbmalloc.so
32
+ - libtbbmalloc.so.2
33
+ - libtbbmalloc_proxy.so
34
+ - libtbbmalloc_proxy.so.2
41
35
  - libcmph.0.dylib
42
36
  - libcmph.dylib
43
- - libcmph.la
44
- - libstaden-read.la
45
37
  - libtbb.dylib
46
38
  - libtbbmalloc.dylib
47
39
  - libtbbmalloc_proxy.dylib
48
40
  version:
49
- number: 'Salmon v0.2.7'
50
- command: 'salmon --help'
41
+ number: '0.3'
42
+ command: 'salmon -v'
51
43
  url:
52
44
  64bit:
53
- linux: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_Ubuntu-12.04.tar.gz
54
- macosx: https://github.com/kingsfordgroup/sailfish/releases/download/v0.2.7/Salmon-v0.2.7_MacOSX-10.10.1.tar.gz
45
+ linux: https://github.com/kingsfordgroup/sailfish/releases/download/v0.3.0/SalmonBeta-v0.3.0_squeeze.tar.gz
46
+ macosx: https://github.com/kingsfordgroup/sailfish/releases/download/v0.3.0/SalmonBeta-v0.3.0_MacOSX-10.10.2.tar.gz
@@ -196,17 +196,16 @@ module Transrate
196
196
 
197
197
  end # basic_bin_stats
198
198
 
199
- def classify_contigs
199
+ def classify_contigs cutoff
200
200
  # create hash of file handles for each output
201
201
  base = File.basename @file
202
202
  files = {}
203
- %w(good fragmented chimeric bad).each do |type|
203
+ %w(good bad).each do |type|
204
204
  files[type.to_sym] = File.open("#{type}.#{base}", "wb")
205
205
  end
206
206
  # loop through contigs writing them out to the appropriate file
207
207
  @assembly.each_pair do |name, contig|
208
- category = contig.classify
209
- handle = files[category]
208
+ handle = files[contig.classify(cutoff)]
210
209
  handle.write contig.to_fasta
211
210
  end
212
211
  # close all the file handles
@@ -215,6 +214,14 @@ module Transrate
215
214
  end
216
215
  end
217
216
 
217
+ def good_contigs
218
+ good = 0
219
+ @assembly.each do |name, contig|
220
+ good += 1 if contig.classification == :good
221
+ end
222
+ good
223
+ end
224
+
218
225
  end # Assembly
219
226
 
220
227
  end # Transrate
@@ -12,9 +12,9 @@ module Transrate
12
12
  # read-based metrics
13
13
  attr_accessor :eff_length, :eff_count, :tpm
14
14
  attr_accessor :coverage, :uncovered_bases, :p_uncovered_bases
15
- attr_accessor :p_seq_true, :p_unique
15
+ attr_accessor :p_seq_true
16
16
  attr_accessor :low_uniqueness_bases, :in_bridges
17
- attr_accessor :p_good, :p_not_segmented, :good
17
+ attr_accessor :p_good, :p_not_segmented, :good, :classification
18
18
  # reference-based metrics
19
19
  attr_accessor :has_crb, :reference_coverage
20
20
  attr_accessor :hits
@@ -37,11 +37,11 @@ module Transrate
37
37
  @p_seq_true = 0
38
38
  @uncovered_bases = length
39
39
  @p_uncovered_bases = 1
40
- @p_unique = 0
41
40
  @p_not_segmented = 1
42
41
  @score = -1
43
42
  @good = 0
44
43
  @coverage = 0
44
+ @classification = :unknown
45
45
  end
46
46
 
47
47
  def each &block
@@ -69,7 +69,6 @@ module Transrate
69
69
  :p_bases_covered => p_bases_covered,
70
70
  :p_seq_true => p_seq_true,
71
71
  :score => score,
72
- :p_unique => p_unique,
73
72
  :p_not_segmented => p_not_segmented,
74
73
  :eff_length => eff_length,
75
74
  :eff_count => eff_count,
@@ -249,21 +248,13 @@ module Transrate
249
248
  # - fragmented (in_bridges > 0) and no other problems
250
249
  # - chimeric (p_not_segmented < 0.25) and no other problems
251
250
  # - bad (score < 0.5 and not in any other category)
252
- def classify
253
- return :good if score >= 0.5
254
- # fragmented?
255
- if in_bridges > 5
256
- if p_not_segmented * p_bases_covered * p_seq_true >= 0.5
257
- return :fragmented
258
- end
259
- end
260
- # chimeric?
261
- if p_not_segmented < 0.25
262
- if p_good * p_bases_covered * p_seq_true >= 0.5
263
- return :chimeric
264
- end
251
+ def classify cutoff
252
+ if score >= cutoff
253
+ @classification = :good
254
+ else
255
+ @classification = :bad
265
256
  end
266
- return :bad
257
+ return @classification
267
258
  end
268
259
 
269
260
  def to_fasta