transrate 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +17 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -1
  5. data/bin/transrate +2 -375
  6. data/docs/transrate_logo_full.png +0 -0
  7. data/files.txt +78 -0
  8. data/lib/transrate.rb +1 -0
  9. data/lib/transrate/assembly.rb +4 -0
  10. data/lib/transrate/cmdline.rb +543 -0
  11. data/lib/transrate/comparative_metrics.rb +3 -0
  12. data/lib/transrate/snap.rb +10 -0
  13. data/lib/transrate/version.rb +1 -1
  14. data/packaging/build_deps_linux.sh +57 -0
  15. data/packaging/build_deps_macosx.sh +55 -0
  16. data/packaging/bundler-config +3 -0
  17. data/packaging/minify.sh +43 -0
  18. data/packaging/transrate +20 -0
  19. data/test/data/test_contig_nc1.fa +2 -0
  20. data/test/data/test_contig_nc2.fa +4 -0
  21. data/test/data/test_contig_nc3.fa +6 -0
  22. data/test/data/test_contig_nc4.fa +4 -0
  23. data/test/data/test_contig_nc5.fa +6 -0
  24. data/test/data/test_contig_nc6.fa +2 -0
  25. data/test/data/test_contig_nc7.fa +2 -0
  26. data/test/data/test_reference_aa1.fa +2 -0
  27. data/test/data/test_reference_nc1.fa +2 -0
  28. data/test/helper.rb +35 -6
  29. data/test/test_assembly.rb +5 -3
  30. data/test/test_cmd.rb +1 -1
  31. data/test/test_cmdline.rb +114 -0
  32. data/test/test_comp_metrics.rb +2 -2
  33. data/test/test_contig.rb +1 -1
  34. data/test/test_contig_metrics.rb +1 -1
  35. data/test/test_inline.rb +1 -1
  36. data/test/test_optimiser.rb +16 -16
  37. data/test/test_read_metrics.rb +1 -1
  38. data/test/test_salmon.rb +1 -1
  39. data/test/test_snap.rb +1 -1
  40. data/test/test_transrate.rb +1 -1
  41. data/test/test_transrater.rb +2 -2
  42. data/test/vagrant/centos_6.5_64/Vagrantfile +122 -0
  43. data/test/vagrant/debian_7.4_64/Vagrantfile +126 -0
  44. data/test/vagrant/debian_7.4_64/provision.sh +28 -0
  45. data/test/vagrant/fedora_20_64/Vagrantfile +122 -0
  46. data/test/vagrant/fedora_20_64/provision.sh +16 -0
  47. data/test/vagrant/linux-x86_64/Vagrantfile +10 -0
  48. data/test/vagrant/osx/Vagrantfile +18 -0
  49. data/test/vagrant/ubuntu_12.04_64/Vagrantfile +126 -0
  50. data/test/vagrant/ubuntu_12.04_64/provision.sh +24 -0
  51. data/transrate.gemspec +2 -2
  52. metadata +38 -17
  53. data/test/test_bin.rb +0 -139
data/lib/transrate.rb CHANGED
@@ -38,3 +38,4 @@ require 'transrate/read_metrics'
38
38
  require 'transrate/comparative_metrics'
39
39
  require 'transrate/contig_metrics'
40
40
  require 'transrate/cmd'
41
+ require 'transrate/cmdline'
@@ -59,6 +59,10 @@ module Transrate
59
59
  logger.error ">#{contig.name}"
60
60
  logger.error "Please make sure there are no duplicate entries in the assembly"
61
61
  logger.error "Contig name is taken from before the first | or space"
62
+ logger.error "If you used Trinity, there is a known bug that breaks" +
63
+ "contig names to make them non-unique."
64
+ logger.error "You can fix your Trinity assembly by replacing | with _"
65
+ logger.error "e.g. `sed 's/\\|/_/' Trinity.fa > Trinity.fixed.fa`"
62
66
  raise AssemblyError
63
67
  end
64
68
  @assembly[contig.name] = contig
@@ -0,0 +1,543 @@
1
+ module Transrate
2
+
3
+ class Cmdline
4
+
5
+ require 'trollop'
6
+ require 'csv'
7
+ require 'bindeps'
8
+ require 'colorize'
9
+ require 'pathname'
10
+
11
+ def initialize args
12
+ @opts = parse_arguments args
13
+ if @opts.examples
14
+ print_examples
15
+ end
16
+ @report_width = 35
17
+ check_arguments
18
+ end
19
+
20
+ def run
21
+ results = []
22
+
23
+ assemblies = @opts.assembly.split(',')
24
+ result_paths = assembly_result_paths assemblies
25
+
26
+ r = @opts.reference ? Assembly.new(File.expand_path @opts.reference) : nil
27
+
28
+ @opts.output = File.expand_path @opts.output
29
+ FileUtils.mkdir_p @opts.output
30
+
31
+ Dir.chdir @opts.output do
32
+ if @opts.merge_assemblies
33
+ assemblies = concatenate_assemblies assemblies
34
+ end
35
+
36
+ assemblies.zip(result_paths) do |assembly, result_path|
37
+ results << analyse_assembly(assembly, r, result_path)
38
+ end
39
+
40
+ write_assembly_csv results
41
+ end
42
+
43
+ end
44
+
45
+ def parse_arguments args
46
+ Trollop::with_standard_exception_handling argument_parser do
47
+ if args.empty? || args.include?("-h") || args.include?("--help")
48
+ transrate_banner
49
+ raise Trollop::HelpNeeded
50
+ end
51
+
52
+ argument_parser.parse args
53
+ end
54
+ end
55
+
56
+ def argument_parser
57
+ cmdline = self
58
+ Trollop::Parser.new do
59
+ version Transrate::VERSION::STRING.dup
60
+ banner cmdline.help_message
61
+ opt :assembly, "Assembly file(s) in FASTA format, comma-separated",
62
+ :type => String
63
+ opt :left, "Left reads file(s) in FASTQ format, comma-separated",
64
+ :type => String
65
+ opt :right, "Right reads file(s) in FASTQ format, comma-separated",
66
+ :type => String
67
+ opt :reference,
68
+ "Reference proteome or transcriptome file in FASTA format",
69
+ :type => String
70
+ opt :threads, "Number of threads to use",
71
+ :default => 8,
72
+ :type => Integer
73
+ opt :merge_assemblies,
74
+ "Merge best contigs from multiple assemblies into file",
75
+ :type => String
76
+ opt :output, "Directory where results are output (will be created)",
77
+ :default => 'transrate_results'
78
+ opt :loglevel,
79
+ "Log level. One of [error, info, warn, debug]",
80
+ :default => 'info'
81
+ opt :install_deps,
82
+ "Install any missing dependencies. One of " +
83
+ "[#{cmdline.allowed_deps.join(', ')}]",
84
+ :type => String, :default => nil
85
+ opt :examples, "Show some example commands with explanations"
86
+ end
87
+ end
88
+
89
+ def terminal_columns
90
+ require 'io/console'
91
+ IO.console.winsize.last
92
+ end
93
+
94
+ def help_message
95
+ <<-EOS
96
+
97
+ Transrate v#{Transrate::VERSION::STRING.dup}
98
+ by Richard Smith-Unna, Chris Boursnell, Rob Patro,
99
+ Julian Hibberd, and Steve Kelly
100
+
101
+ DESCRIPTION:
102
+ Analyse a de-novo transcriptome assembly using three kinds of metrics:
103
+
104
+ 1. sequence based (if --assembly is given)
105
+ 2. read mapping based (if --left and --right are given)
106
+ 3. reference based (if --reference is given)
107
+
108
+ Documentation at http://hibberdlab.com/transrate
109
+
110
+ USAGE:
111
+ transrate <options>
112
+
113
+ OPTIONS:
114
+
115
+ EOS
116
+ end
117
+
118
+ def transrate_banner
119
+ if terminal_columns > 70
120
+ txp = '░▓▓▓^▓▓▓░'
121
+ toptxp = txp.green
122
+ midtxp = txp.yellow
123
+ bottxp = txp.red
124
+ puts <<-EOS
125
+ _ _
126
+ | |_ _ __ __ _ _ __ ___ _ __ __ _ | |_ ___
127
+ #{toptxp} | __|| '__|/ _` || '_ \\ / __|| '__|/ _` || __|/ _ \\ #{toptxp}
128
+ #{midtxp} | |_ | | | (_| || | | |\\__ \\| | | (_| || |_| __/ #{midtxp}
129
+ #{bottxp} \\__||_| \\__,_||_| |_||___/|_| \\__,_| \\__|\\___| #{bottxp}
130
+ EOS
131
+ end
132
+ ""
133
+ end
134
+
135
+ def print_examples
136
+ msg = <<-EOS
137
+
138
+ Transrate v#{Transrate::VERSION::STRING.dup}
139
+
140
+ EXAMPLE COMMANDS:
141
+
142
+ # check dependencies and install any that are missing
143
+ transrate --install-deps all
144
+
145
+ # get the transrate score for the assembly and each contig
146
+ transrate --assembly contigs.fa --left left.fq --right right.fq
147
+
148
+ # basic assembly metrics only
149
+ transrate --assembly contigs.fa
150
+
151
+ # basic and reference-based metrics with 8 threads
152
+ transrate --assembly contigs.fa --reference ref.fa --threads 8
153
+
154
+ # contig and read-based metrics for two assemblies with 32 threads
155
+ transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
156
+
157
+ EOS
158
+ puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
159
+ exit(0)
160
+ end
161
+
162
+ def check_arguments
163
+ check_dependencies
164
+ check_loglevel
165
+ check_assembly
166
+ check_reference
167
+ check_reads
168
+ end
169
+
170
+ def check_loglevel
171
+ unless %w[error info warn debug].include? @opts.loglevel
172
+ raise TransrateError.new "Loglevel #{@opts.loglevel} is not valid. " +
173
+ "It must be one of: error, info, warn, debug."
174
+ end
175
+
176
+ logger.level = Yell::Level.new @opts.loglevel.to_sym
177
+ end
178
+
179
+ def check_assembly
180
+ if @opts.assembly
181
+ @opts[:assembly] = @opts.assembly.split(',').map do |a|
182
+ File.expand_path a
183
+ end.join(',')
184
+ @opts.assembly.split(',').each do |assembly_file|
185
+ unless File.exist?(assembly_file)
186
+ raise TransrateIOError.new "Assembly fasta file does not exist: " +
187
+ " #{assembly_file}"
188
+ end
189
+ end
190
+ else
191
+ raise TransrateArgError.new "Option --assembly must be specified. " +
192
+ "Try --help for help."
193
+ end
194
+ end
195
+
196
+ def check_reference
197
+ if @opts.reference
198
+ @opts[:reference] = File.expand_path @opts.reference
199
+ if !File.exist?(@opts.reference)
200
+ raise TransrateIOError.new "Reference fasta file does not exist: " +
201
+ " #{@opts.reference}"
202
+ end
203
+ end
204
+ end
205
+
206
+ def check_reads
207
+ if @opts.left and @opts.right
208
+ if @opts.left.split(",").length != @opts.right.split(",").length
209
+ msg = "Please provide the same number of left reads as right reads"
210
+ raise TransrateArgError.new msg
211
+ end
212
+ @opts[:left] = @opts.left.split(',').map { |f|
213
+ File.expand_path f
214
+ }.join(',')
215
+ @opts[:right] = @opts.right.split(',').map { |f|
216
+ File.expand_path f
217
+ }.join(',')
218
+ @opts.left.split(",").zip(@opts.right.split(",")).each do |left,right|
219
+ if !File.exist?(left)
220
+ raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
221
+ end
222
+ if !File.exist?(right)
223
+ raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
224
+ end
225
+ end
226
+ end
227
+ end
228
+
229
+ def check_dependencies
230
+ # Check dependencies if they are relevant to the command issued,
231
+ # and handle any commands to install missing ones
232
+ gem_dir = Gem.loaded_specs['transrate'].full_gem_path
233
+ gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
234
+ blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
235
+
236
+ deps, read_deps, ref_deps = nil
237
+ unless @opts.install_deps.nil?
238
+ check_install_command
239
+
240
+ deps = @opts.install_deps == 'all'
241
+ read_deps = @opts.install_deps == 'read'
242
+ ref_deps = @opts.install_deps == 'ref'
243
+ end
244
+
245
+ if deps || read_deps || ref_deps
246
+ # user has requested dependency installation
247
+ puts "Checking dependencies"
248
+ install_missing_dependencies(deps, read_deps, ref_deps,
249
+ gem_deps, blast_dep)
250
+ else
251
+ # no dependency installation requested, but check dependencies
252
+ # for the commands provided are installed
253
+ missing = []
254
+ missing = Bindeps.missing gem_deps if @opts.left
255
+ blast_missing = []
256
+ blast_missing = Bindeps.missing blast_dep if @opts.reference
257
+ print_missing_dependencies(missing, blast_missing)
258
+ end
259
+
260
+ end # check_dependencies
261
+
262
+ def allowed_deps
263
+ binkey = 'TRANSRATE_PACKAGED_BINARY'
264
+ if ENV.has_key?(binkey) && ENV[binkey] == 'true'
265
+ return ['read']
266
+ else
267
+ return ['read', 'ref', 'all']
268
+ end
269
+ end
270
+
271
+ def check_install_command
272
+ unless allowed_deps.include? @opts.install_deps
273
+ msg = "install-deps #{@opts.install_deps} is not valid. " +
274
+ "You must specify one of: #{allowed_deps.join(', ')}."
275
+ raise TransrateError.new(msg)
276
+ end
277
+ end
278
+
279
+ def install_missing_dependencies(deps, read_deps, ref_deps,
280
+ gem_deps, blast_dep)
281
+ missing = []
282
+ if deps || read_deps
283
+ Bindeps.require gem_deps
284
+ missing += Bindeps.missing gem_deps
285
+ end
286
+
287
+ if deps || ref_deps
288
+ Bindeps.require blast_dep
289
+ missing += Bindeps.missing blast_dep
290
+ end
291
+
292
+ unless missing.empty?
293
+ list = missing.collect {|i| "#{i.name}:#{i.version}"}.join("\n - ")
294
+ msg = "Failed to install: \n - #{list}"
295
+ raise TransrateError.new msg
296
+ end
297
+
298
+ puts "All dependencies installed"
299
+ exit
300
+ end # install_missing_dependencies
301
+
302
+ def print_missing_dependencies(missing, blast_missing)
303
+ if missing.length + blast_missing.length > 0
304
+ puts "Dependencies are missing:"
305
+
306
+ missing.each do |dep|
307
+ puts " - #{dep.name} (#{dep.version})"
308
+ end
309
+
310
+ blast_missing.each do |dep|
311
+ puts " - #{dep.name} (#{dep.version})"
312
+ end
313
+
314
+ puts "To install all missing dependencies, run:"
315
+ puts " transrate --install-deps all"
316
+ puts "If you only want the read-metrics dependencies:"
317
+ puts " transrate --install-deps read"
318
+ puts "Or if you only want the reference-metrics dependencies: "
319
+ puts " transrate --install-deps ref"
320
+
321
+ exit 1
322
+ end
323
+ end
324
+
325
+ def pretty_print_hash(hash, width, round=2)
326
+ hash.map do |k, v|
327
+ # show as float if there are any decimal places
328
+ if v.to_f.round(round).to_s.split('.').last.to_i > 0
329
+ v = v.to_f.round(round)
330
+ end
331
+ if v.is_a? Float
332
+ v = v.round(round)
333
+ end
334
+ pad = (width - (k.to_s.length + v.to_s.length))
335
+ pad = [pad, 0].max
336
+ logger.info "#{k.to_s.split('_').join(' ')}" +
337
+ "#{" " * pad}" +
338
+ "#{v}"
339
+ end
340
+ end
341
+
342
+ def concatenate_assemblies assemblies
343
+ merged_file = @opts.merge_assemblies
344
+ merged = {}
345
+ assemblies.each do |file|
346
+ Bio::FastaFormat.open(file).each do |entry|
347
+ contig_name = "#{File.basename(file,File.extname(file))}:"
348
+ contig_name << "#{entry.entry_id}"
349
+ merged[contig_name] = entry.seq
350
+ end
351
+ end
352
+ logger.info "Merging assemblies into one file...'#{merged_file}'"
353
+ File.open(merged_file, "wb") do |out|
354
+ merged.each do |name, seq|
355
+ out.write ">#{name}\n"
356
+ out.write "#{seq}\n"
357
+ end
358
+ end
359
+ merged_file
360
+ end
361
+
362
+ def analyse_assembly(assembly, r, result_path)
363
+ logger.info "Loading assembly: #{assembly}"
364
+ a = Assembly.new assembly
365
+
366
+ logger.info "Analysing assembly: #{assembly}"
367
+ logger.info "Results will be saved in #{File.expand_path result_path}"
368
+
369
+ contig_results = {}
370
+ read_results = {}
371
+ comparative_results = {}
372
+ score, optimal, cutoff = ["NA", "NA", "NA"]
373
+
374
+ FileUtils.mkdir_p result_path
375
+ Dir.chdir result_path do
376
+ transrater = Transrater.new(a, r, threads: @opts.threads)
377
+
378
+ contig_results = contig_metrics transrater
379
+ read_results = read_metrics transrater
380
+ comparative_results = comparative_metrics transrater
381
+ if (@opts.left && @opts.right)
382
+ score, optimal, cutoff = assembly_score(assembly, transrater)
383
+ end
384
+
385
+ write_contig_csv a
386
+ end
387
+
388
+ contig_results.merge(read_results)
389
+ .merge(comparative_results)
390
+ .merge({ :assembly => assembly })
391
+ .merge({ :score => score })
392
+ .merge({ :optimal_score => optimal })
393
+ .merge({ :cutoff => cutoff })
394
+
395
+ end # analyse_assembly
396
+
397
+ def assembly_result_paths assemblies
398
+ if (assemblies.length == 1)
399
+ return [File.basename(assemblies.first, File.extname(assemblies.first))]
400
+ end
401
+ paths = assemblies.map { |a| File.expand_path a }
402
+ common_prefix = common_directory_path paths
403
+ paths.map! { |p| p.to_path.gsub(common_prefix, "").gsub(/^\//, "") }
404
+ paths.map { |p| assembly_result_path p }
405
+ end
406
+
407
+ def assembly_result_path assembly
408
+ path = assembly.gsub(File::SEPARATOR, '_')
409
+ File.basename(path, File.extname(path))
410
+ end
411
+
412
+ def common_directory_path(dirs)
413
+ separator = File::SEPARATOR
414
+ dir1, dir2 = dirs.minmax.map{ |dir| dir.split(separator) }
415
+ dir1.zip(dir2).take_while{ |dn1,dn2| dn1==dn2 }.map(&:first).join(separator)
416
+ end
417
+
418
+ def write_assembly_csv results
419
+ outfile = "assemblies.csv"
420
+ logger.info "Writing analysis results to #{outfile}"
421
+
422
+ CSV.open(outfile, 'wb') do |file|
423
+
424
+ keys = results[0].keys
425
+ keys.delete(:assembly)
426
+ head = [:assembly] + keys
427
+ file << head
428
+ results.each do |row|
429
+ file << head.map { |x|
430
+ entry = row[x]
431
+ entry.is_a?(Float) ? entry.round(5) : entry
432
+ }
433
+ end
434
+
435
+ end
436
+
437
+ end # write_assembly_csv
438
+
439
+ def contig_metrics transrater
440
+ logger.info "Calculating contig metrics..."
441
+ t0 = Time.now
442
+ contig_results = transrater.assembly_metrics.basic_stats
443
+ contig_results.merge! transrater.assembly.contig_metrics.results
444
+
445
+ if contig_results
446
+ logger.info "Contig metrics:"
447
+ logger.info "-" * @report_width
448
+ pretty_print_hash(contig_results, @report_width)
449
+ end
450
+
451
+ logger.info "Contig metrics done in #{(Time.now - t0).round} seconds"
452
+ contig_results
453
+ end
454
+
455
+ def read_metrics transrater
456
+ read_results = {}
457
+ if (@opts.left && @opts.right)
458
+ logger.info "Calculating read diagnostics..."
459
+ t0 = Time.now
460
+ read_results = transrater.read_metrics(@opts.left, @opts.right).read_stats
461
+
462
+ if read_results
463
+ logger.info "Read mapping metrics:"
464
+ logger.info "-" * @report_width
465
+ pretty_print_hash(read_results, @report_width)
466
+ end
467
+
468
+ logger.info "Read metrics done in #{(Time.now - t0).round} seconds"
469
+ else
470
+ logger.info "No reads provided, skipping read diagnostics"
471
+ end
472
+ read_results
473
+ end
474
+
475
+ def comparative_metrics transrater
476
+ comparative_results = {}
477
+ if @opts.reference
478
+ logger.info "Calculating comparative metrics..."
479
+ t0 = Time.now
480
+ comparative_results = transrater.comparative_metrics.comp_stats
481
+
482
+ if comparative_results
483
+ logger.info "Comparative metrics:"
484
+ logger.info "-" * @report_width
485
+ pretty_print_hash(comparative_results, @report_width)
486
+ end
487
+
488
+ logger.info "Comparative metrics done in #{(Time.now - t0).round} seconds"
489
+
490
+ logger.info "-" * @report_width
491
+ else
492
+ logger.info "No reference provided, skipping comparative diagnostics"
493
+ end
494
+ comparative_results
495
+ end
496
+
497
+ def assembly_score(assembly, transrater)
498
+ score = transrater.assembly_score
499
+
500
+ prefix = File.basename(assembly)
501
+ optimal, cutoff = transrater.assembly_optimal_score prefix
502
+ unless score.nil?
503
+ pretty_print_hash({:TRANSRATE_ASSEMBLY_SCORE => score},
504
+ @report_width, 4)
505
+ logger.info "-" * @report_width
506
+ pretty_print_hash({:TRANSRATE_OPTIMAL_SCORE => optimal},
507
+ @report_width, 4)
508
+ pretty_print_hash({:TRANSRATE_OPTIMAL_CUTOFF => cutoff},
509
+ @report_width, 4)
510
+ pretty_print_hash(transrater.good_contigs, @report_width)
511
+ end
512
+ [score, optimal, cutoff]
513
+ end
514
+
515
+ def write_contig_csv a
516
+ # write contig metrics to file for each contig
517
+ outfile = File.expand_path "contigs.csv"
518
+ logger.info "Writing contig metrics for each contig to #{outfile}"
519
+ # have option to turn off, default on
520
+ first=true
521
+ CSV.open(outfile, 'wb') do |csv|
522
+ a.each do |name, contig|
523
+ basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
524
+ if @opts.reference
525
+ comp_metrics = contig.comparative_metrics
526
+ basic_metrics.merge!(comp_metrics)
527
+ end
528
+ if @opts.left and @opts.right
529
+ read_metrics = contig.read_metrics
530
+ basic_metrics.merge!(read_metrics)
531
+ end
532
+ if first
533
+ csv << basic_metrics.keys
534
+ first = false
535
+ end
536
+ csv << basic_metrics.values.map{ |x| x.is_a?(Float) ? x.round(6) : x }
537
+ end
538
+ end
539
+ end
540
+
541
+ end # Cmdline
542
+
543
+ end # Transrate