transrate 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +17 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -1
  5. data/bin/transrate +2 -375
  6. data/docs/transrate_logo_full.png +0 -0
  7. data/files.txt +78 -0
  8. data/lib/transrate.rb +1 -0
  9. data/lib/transrate/assembly.rb +4 -0
  10. data/lib/transrate/cmdline.rb +543 -0
  11. data/lib/transrate/comparative_metrics.rb +3 -0
  12. data/lib/transrate/snap.rb +10 -0
  13. data/lib/transrate/version.rb +1 -1
  14. data/packaging/build_deps_linux.sh +57 -0
  15. data/packaging/build_deps_macosx.sh +55 -0
  16. data/packaging/bundler-config +3 -0
  17. data/packaging/minify.sh +43 -0
  18. data/packaging/transrate +20 -0
  19. data/test/data/test_contig_nc1.fa +2 -0
  20. data/test/data/test_contig_nc2.fa +4 -0
  21. data/test/data/test_contig_nc3.fa +6 -0
  22. data/test/data/test_contig_nc4.fa +4 -0
  23. data/test/data/test_contig_nc5.fa +6 -0
  24. data/test/data/test_contig_nc6.fa +2 -0
  25. data/test/data/test_contig_nc7.fa +2 -0
  26. data/test/data/test_reference_aa1.fa +2 -0
  27. data/test/data/test_reference_nc1.fa +2 -0
  28. data/test/helper.rb +35 -6
  29. data/test/test_assembly.rb +5 -3
  30. data/test/test_cmd.rb +1 -1
  31. data/test/test_cmdline.rb +114 -0
  32. data/test/test_comp_metrics.rb +2 -2
  33. data/test/test_contig.rb +1 -1
  34. data/test/test_contig_metrics.rb +1 -1
  35. data/test/test_inline.rb +1 -1
  36. data/test/test_optimiser.rb +16 -16
  37. data/test/test_read_metrics.rb +1 -1
  38. data/test/test_salmon.rb +1 -1
  39. data/test/test_snap.rb +1 -1
  40. data/test/test_transrate.rb +1 -1
  41. data/test/test_transrater.rb +2 -2
  42. data/test/vagrant/centos_6.5_64/Vagrantfile +122 -0
  43. data/test/vagrant/debian_7.4_64/Vagrantfile +126 -0
  44. data/test/vagrant/debian_7.4_64/provision.sh +28 -0
  45. data/test/vagrant/fedora_20_64/Vagrantfile +122 -0
  46. data/test/vagrant/fedora_20_64/provision.sh +16 -0
  47. data/test/vagrant/linux-x86_64/Vagrantfile +10 -0
  48. data/test/vagrant/osx/Vagrantfile +18 -0
  49. data/test/vagrant/ubuntu_12.04_64/Vagrantfile +126 -0
  50. data/test/vagrant/ubuntu_12.04_64/provision.sh +24 -0
  51. data/transrate.gemspec +2 -2
  52. metadata +38 -17
  53. data/test/test_bin.rb +0 -139
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1cdc0e54bd5a77cc14727d6c42e739e6c9fa4d10
4
- data.tar.gz: ee950d7fcd11d5662a7012c1aeefb597dfe7dd4a
3
+ metadata.gz: d6bdeb8a50ecd17a0178e45db4bf4f927634cdc0
4
+ data.tar.gz: aaf1a27b63ef7e5a8388cbf52268acb50a362672
5
5
  SHA512:
6
- metadata.gz: aa61ac533468fdcd3dbca9949996a4d6c62dcf540fd8f0f7194251b9b88415e28f9c54e884586853ec12244e4778d4bc840a396be3c85c55acb09f71a0d77e4d
7
- data.tar.gz: f55918c002f429f1d4912feca88b91e7647bb4064348e254860673d8d9d52bd8bad512b3f7e23e166ad87defbe67de51eeb043620a43dc51eed61e4d40aeece1
6
+ metadata.gz: d3c2c0c0dbba505ed8ca05b3136cf531495b246cfa0ba6b378e8651c3764d0673282668bf6a658bda148cc2139107fabc83540749b474529e74ae6b3862a9f0d
7
+ data.tar.gz: 53f1676cf1c5cc56d44a1cb5fcc58bd4137486292e379a959e49931121f1d8f83faa10b3ba24f9665bf7a9595a92e53908a39948e9d0f7a481dbaef128d9f9f7
data/.travis.yml ADDED
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.0.0"
4
+ - "2.1.0"
5
+ - "2.2.0"
6
+ before_script:
7
+ - gem install bindeps
8
+ - bundle install
9
+ - bundle exec rake compile
10
+ - bundle exec bin/transrate --install-deps all
11
+ os:
12
+ - linux
13
+ - osx
14
+ matrix:
15
+ allow_failures:
16
+ - os: osx
17
+ - rvm: "2.2.0"
data/README.md CHANGED
@@ -28,7 +28,7 @@ This software is being actively developed. Please be aware that there may be bug
28
28
 
29
29
  ## Citation
30
30
 
31
- Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/3687/Blahah/transrate.svg)](http://dx.doi.org/10.5281/zenodo.18325).
31
+ Transrate is pre-publication academic software. If you use it, please cite [the preprint on biorXiv](http://dx.doi.org/10.1101/021626).
32
32
 
33
33
  ## Documentation
34
34
 
data/Rakefile CHANGED
@@ -90,7 +90,7 @@ task :default => :test
90
90
  # PACKAGING
91
91
 
92
92
  PACKAGE_NAME = "transrate"
93
- VERSION = "1.0.0"
93
+ VERSION = "1.0.1"
94
94
  TRAVELING_RUBY_VERSION = "20150210-2.2.0"
95
95
 
96
96
  desc "Package your app"
data/bin/transrate CHANGED
@@ -1,16 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
-
3
- require 'trollop'
4
2
  require 'transrate'
5
- require 'csv'
6
- require 'bindeps'
7
- require 'colorize'
8
-
9
3
  include Transrate
10
4
 
11
- # Show the help message if no arguments provided
12
- ARGV[0] = "--help" if ARGV.length() == 0
13
-
14
5
  # We want clean error messages through the logger, no ugly backtraces
15
6
  # because the user doesn't care about them, unless they specifically ask for
16
7
  # them with --loglevel debug
@@ -28,369 +19,5 @@ module Kernel
28
19
  end
29
20
  end
30
21
 
31
- txp = '░▓▓▓^▓▓▓░'
32
- toptxp = txp.green
33
- midtxp = txp.yellow
34
- bottxp = txp.red
35
-
36
- opts = Trollop::options do
37
- version Transrate::VERSION::STRING.dup
38
- banner <<-EOS
39
- _ _
40
- | |_ _ __ __ _ _ __ ___ _ __ __ _ | |_ ___
41
- #{toptxp} | __|| '__|/ _` || '_ \\ / __|| '__|/ _` || __|/ _ \\ #{toptxp}
42
- #{midtxp} | |_ | | | (_| || | | |\\__ \\| | | (_| || |_| __/ #{midtxp}
43
- #{bottxp} \\__||_| \\__,_||_| |_||___/|_| \\__,_| \\__|\\___| #{bottxp}
44
-
45
- Transrate v#{Transrate::VERSION::STRING.dup}
46
- by Richard Smith-Unna, Chris Boursnell, Rob Patro,
47
- Julian Hibberd, and Steve Kelly
48
-
49
- DESCRIPTION:
50
- Analyse a de-novo transcriptome assembly using three kinds of metrics:
51
-
52
- 1. sequence based (if --assembly is given)
53
- 2. read mapping based (if --left and --right are given)
54
- 3. reference based (if --reference is given)
55
-
56
- Documentation at http://hibberdlab.com/transrate
57
-
58
- USAGE:
59
- transrate <options>
60
-
61
- OPTIONS:
62
-
63
- EOS
64
- opt :assembly, "Assembly file(s) in FASTA format, comma-separated",
65
- :type => String
66
- opt :left, "Left reads file in FASTQ format",
67
- :type => String
68
- opt :right, "Right reads file in FASTQ format",
69
- :type => String
70
- opt :reference, "Reference proteome or transcriptome file in FASTA format",
71
- :type => String
72
- opt :threads, "Number of threads to use",
73
- :default => 8,
74
- :type => Integer
75
- opt :merge_assemblies, "Merge best contigs from multiple assemblies into file",
76
- :type => String
77
- opt :outfile, "Prefix filename to use for CSV output",
78
- :default => 'transrate'
79
- opt :loglevel, "Log level. " +
80
- "One of [error, info, warn, debug]",
81
- :default => 'info'
82
- opt :install_deps, "Install any missing dependencies. One of [all, read, ref]",
83
- :type => String, :default => nil
84
- opt :examples, "Show some example commands with explanations"
85
- end
86
-
87
- if opts.examples
88
- puts <<-EOS
89
-
90
- Transrate v#{Transrate::VERSION::STRING.dup}
91
-
92
- EXAMPLE COMMANDS:
93
-
94
- # check dependencies and install any that are missing
95
- transrate --install-deps
96
-
97
- # get the transrate score for the assembly and each contig
98
- transrate --assembly contigs.fa --left left.fq --right right.fq
99
-
100
- # basic assembly metrics only
101
- transrate --assembly contigs.fa
102
-
103
- # basic and reference-based metrics with 8 threads
104
- transrate --assembly contigs.fa --reference ref.fa --threads 8
105
-
106
- # contig and read-based metrics for two assemblies with 32 threads
107
- transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
108
-
109
- EOS
110
- exit(0)
111
- end
112
-
113
- # Check dependencies if they are relevant to the command issued,
114
- # and handle any commands to install missing ones
115
- gem_dir = Gem.loaded_specs['transrate'].full_gem_path
116
- gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
117
- blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
118
-
119
- deps, read_deps, ref_deps = nil
120
- unless opts.install_deps.nil?
121
-
122
- unless %w[all read ref].include? opts.install_deps
123
- raise TransrateError.new "install-deps #{opts.install_deps} is not valid. " +
124
- "You must specify one of: all, read, ref."
125
- end
126
-
127
- deps = opts.install_deps == 'all'
128
- read_deps = opts.install_deps == 'read'
129
- ref_deps = opts.install_deps == 'ref'
130
- end
131
-
132
- if deps || read_deps || ref_deps
133
- # user has requested dependency installation
134
- puts "Checking dependencies"
135
-
136
- missing = []
137
- if deps || read_deps
138
- Bindeps.require gem_deps
139
- missing += Bindeps.missing gem_deps
140
- end
141
-
142
- if deps || ref_deps
143
- Bindeps.require blast_dep
144
- missing += Bindeps.missing blast_dep
145
- end
146
-
147
- unless missing.empty?
148
- list = missing.collect {|i| "#{i.name}:#{i.version}"}.join("\n - ")
149
- msg = "Failed to install: \n - #{list}"
150
- raise TransrateError.new msg
151
- end
152
-
153
- puts "All dependencies installed"
154
- exit
155
-
156
- else
157
- # no dependency installation requested, but check dependencies
158
- # for the commands provided are installed
159
- missing = []
160
- missing = Bindeps.missing gem_deps if opts.left
161
- blast_missing = []
162
- blast_missing = Bindeps.missing blast_dep if opts.reference
163
-
164
- if missing.length + blast_missing.length > 0
165
- puts "Dependencies are missing:"
166
-
167
- missing.each do |dep|
168
- puts " - #{dep.name} (#{dep.version})"
169
- end
170
-
171
- blast_missing.each do |dep|
172
- puts " - #{dep.name} (#{dep.version})"
173
- end
174
-
175
- puts "To install all missing dependencies, run:"
176
- puts " transrate --install-deps a;;"
177
- puts "If you only want the read-metrics dependencies:"
178
- puts " transrate --install-deps read"
179
- puts "Or if you only want the reference-metrics dependencies: "
180
- puts " transrate --install-deps ref"
181
-
182
- exit 1
183
- end
184
-
185
- end
186
-
187
- # Handle commands
188
- unless %w[error info warn debug].include? opts.loglevel
189
- raise TransrateError.new "Loglevel #{opts.loglevel} is not valid. " +
190
- "It must be one of: error, info, warn, debug."
191
- end
192
-
193
- logger.level = Yell::Level.new opts.loglevel.to_sym
194
-
195
- if opts.assembly
196
- opts.assembly.split(',').each do |assembly_file|
197
- unless File.exist?(assembly_file)
198
- raise TransrateIOError.new "Assembly fasta file does not exist: " +
199
- " #{assembly_file}"
200
- end
201
- end
202
- else
203
- raise TransrateArgError.new "Option --assembly must be specified. " +
204
- "Try --help for help."
205
- end
206
-
207
- if opts.reference && !File.exist?(opts.reference)
208
- raise TransrateIOError.new "Reference fasta file does not exist: " +
209
- " #{opts.reference}"
210
- end
211
-
212
- if opts.left and opts.right
213
- if opts.left.split(",").length != opts.right.split(",").length
214
- msg = "Please provide the same number of left reads as right reads"
215
- raise TransrateArgError.new msg
216
- end
217
- opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
218
- if !File.exist?(left)
219
- raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
220
- end
221
- if !File.exist?(right)
222
- raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
223
- end
224
- end
225
- end
226
-
227
- def pretty_print_hash hash, width, round=2
228
- hash.map do |k, v|
229
- # show as float if there are any decimal places
230
- if v.to_f.round(round).to_s.split('.').last.to_i > 0
231
- v = v.to_f.round(round)
232
- end
233
- if v.is_a? Float
234
- v = v.round(round)
235
- end
236
- pad = (width - (k.to_s.length + v.to_s.length))
237
- pad = [pad, 0].max
238
- logger.info "#{k.to_s.split('_').join(' ')}" +
239
- "#{" " * pad}" +
240
- "#{v}"
241
- end
242
- end
243
-
244
- r = opts.reference ? Assembly.new(opts.reference) : nil
245
- report_width = 35
246
-
247
- # loop through the assemblies, storing their outputs in an array of hashes
248
- all = []
249
-
250
- assemblies=opts.assembly
251
- if opts.merge_assemblies
252
- merged_file = opts.merge_assemblies
253
- merged = {}
254
- assemblies.split(",").each do |file|
255
- Bio::FastaFormat.open(file).each do |entry|
256
- contig_name = "#{File.basename(file,File.extname(file))}:"
257
- contig_name << "#{entry.entry_id}"
258
- merged[contig_name] = entry.seq
259
- end
260
- end
261
- logger.info "Merging assemblies into one file...'#{merged_file}'"
262
- File.open(merged_file, "wb") do |out|
263
- merged.each do |name, seq|
264
- out.write ">#{name}\n"
265
- out.write "#{seq}\n"
266
- end
267
- end
268
-
269
- assemblies = merged_file
270
- end
271
-
272
- assemblies.split(',').each do |assembly|
273
-
274
- logger.info "Loading assembly: #{assembly}"
275
-
276
- a = Assembly.new assembly
277
- transrater = Transrater.new(a, r, threads: opts.threads)
278
-
279
- logger.info "Analysing assembly: #{assembly}"
280
-
281
- contig_results = {}
282
-
283
- logger.info "Calculating contig metrics..."
284
- t0 = Time.now
285
- contig_results = transrater.assembly_metrics.basic_stats
286
- contig_results.merge! transrater.assembly.contig_metrics.results
287
- if contig_results
288
- logger.info "Contig metrics:"
289
- logger.info "-" * report_width
290
- pretty_print_hash(contig_results, report_width)
291
- end
292
-
293
- logger.info "Contig metrics done in #{(Time.now - t0).round} seconds"
294
-
295
- read_results = {}
296
-
297
- if (opts.left && opts.right)
298
- logger.info "Calculating read diagnostics..."
299
- t0 = Time.now
300
- read_results = transrater.read_metrics(opts.left, opts.right).read_stats
301
-
302
- if read_results
303
- logger.info "Read mapping metrics:"
304
- logger.info "-" * report_width
305
- pretty_print_hash(read_results, report_width)
306
- end
307
-
308
- logger.info "Read metrics done in #{(Time.now - t0).round} seconds"
309
- else
310
- logger.info "No reads provided, skipping read diagnostics"
311
- end
312
-
313
- comparative_results={}
314
-
315
- if opts.reference
316
- logger.info "Calculating comparative metrics..."
317
- t0 = Time.now
318
- comparative_metrics = transrater.comparative_metrics
319
- comparative_results = comparative_metrics.comp_stats
320
-
321
- if comparative_results
322
- logger.info "Comparative metrics:"
323
- logger.info "-" * report_width
324
- pretty_print_hash(comparative_results, report_width)
325
- end
326
-
327
- logger.info "Comparative metrics done in #{(Time.now - t0).round} seconds"
328
-
329
- logger.info "-" * report_width
330
- else
331
- logger.info "No reference provided, skipping comparative diagnostics"
332
- end
333
-
334
- prefix = "#{opts.outfile}_#{File.basename(assembly)}"
335
-
336
- if (opts.left && opts.right)
337
- score = transrater.assembly_score
338
-
339
- optimal, cutoff = transrater.assembly_optimal_score prefix
340
- unless score.nil?
341
- pretty_print_hash({:TRANSRATE_ASSEMBLY_SCORE => score}, report_width, 4)
342
- logger.info "-" * report_width
343
- pretty_print_hash({:TRANSRATE_OPTIMAL_SCORE => optimal}, report_width, 4)
344
- pretty_print_hash({:TRANSRATE_OPTIMAL_CUTOFF => cutoff}, report_width, 4)
345
- pretty_print_hash(transrater.good_contigs, report_width)
346
- end
347
- end
348
-
349
- # write contig metrics to file for each contig
350
- outfile = "#{prefix}_contigs.csv"
351
- logger.info "Writing contig metrics for each contig to #{outfile}"
352
- # have option to turn off, default on
353
- first=true
354
- CSV.open(outfile, 'wb') do |csv|
355
- a.each do |name, contig|
356
- basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
357
- if opts.reference
358
- comp_metrics = contig.comparative_metrics
359
- basic_metrics.merge!(comp_metrics)
360
- end
361
- if opts.left and opts.right
362
- read_metrics = contig.read_metrics
363
- basic_metrics.merge!(read_metrics)
364
- end
365
- if first
366
- csv << basic_metrics.keys
367
- first = false
368
- end
369
- csv << basic_metrics.values.map{ |x| x.is_a?(Float) ? x.round(6) : x }
370
- end
371
- end
372
-
373
- all << contig_results.merge(read_results)
374
- .merge(comparative_results)
375
- .merge({ :assembly => assembly })
376
- .merge({ :score => score })
377
- .merge({ :optimal_score => optimal })
378
- .merge({ :cutoff => cutoff })
379
-
380
- end
381
-
382
- # write out all resuls to .csv
383
- outfile = "#{opts.outfile}_assemblies.csv"
384
- logger.info "Writing analysis results to #{outfile}"
385
- CSV.open(outfile, 'wb') do |file|
386
- keys = all[0].keys
387
- keys.delete(:assembly)
388
- head = [:assembly] + keys
389
- file << head
390
- all.each do |row|
391
- file << head.map { |x|
392
- entry = row[x]
393
- entry.is_a?(Float) ? entry.round(5) : entry
394
- }
395
- end
396
- end
22
+ cmdline = Cmdline.new ARGV
23
+ cmdline.run
Binary file
data/files.txt ADDED
@@ -0,0 +1,78 @@
1
+ .gitignore
2
+ .travis.yml
3
+ CITATION
4
+ Gemfile
5
+ LICENSE
6
+ README.md
7
+ Rakefile
8
+ bin/transrate
9
+ deps/blast.yaml
10
+ deps/deps.yaml
11
+ docs/transrate_logo_full.png
12
+ ext/transrate/extconf.rb
13
+ ext/transrate/transrate.c
14
+ files.txt
15
+ lib/transrate.rb
16
+ lib/transrate/assembly.rb
17
+ lib/transrate/cmd.rb
18
+ lib/transrate/cmdline.rb
19
+ lib/transrate/comparative_metrics.rb
20
+ lib/transrate/contig.rb
21
+ lib/transrate/contig_metrics.rb
22
+ lib/transrate/read_metrics.rb
23
+ lib/transrate/salmon.rb
24
+ lib/transrate/score_optimiser.rb
25
+ lib/transrate/snap.rb
26
+ lib/transrate/transrater.rb
27
+ lib/transrate/version.rb
28
+ lib/transrate/writer.rb
29
+ packaging/build_deps_linux.sh
30
+ packaging/build_deps_macosx.sh
31
+ packaging/bundler-config
32
+ packaging/minify.sh
33
+ packaging/transrate
34
+ test/data/150uncovered.l.fq
35
+ test/data/150uncovered.r.fq
36
+ test/data/Os.protein.2.fa
37
+ test/data/Os.protein.fa
38
+ test/data/assembly.2.fa
39
+ test/data/assembly.fasta
40
+ test/data/bridging_reads.l.fastq
41
+ test/data/bridging_reads.r.fastq
42
+ test/data/sorghum_100.fa
43
+ test/data/sorghum_transcript.fa
44
+ test/data/test.sf
45
+ test/data/test_contig_nc1.fa
46
+ test/data/test_contig_nc2.fa
47
+ test/data/test_contig_nc3.fa
48
+ test/data/test_contig_nc4.fa
49
+ test/data/test_contig_nc5.fa
50
+ test/data/test_contig_nc6.fa
51
+ test/data/test_contig_nc7.fa
52
+ test/data/test_reference_aa1.fa
53
+ test/data/test_reference_nc1.fa
54
+ test/data/tiny.sam
55
+ test/helper.rb
56
+ test/test_assembly.rb
57
+ test/test_cmd.rb
58
+ test/test_cmdline.rb
59
+ test/test_comp_metrics.rb
60
+ test/test_contig.rb
61
+ test/test_contig_metrics.rb
62
+ test/test_inline.rb
63
+ test/test_optimiser.rb
64
+ test/test_read_metrics.rb
65
+ test/test_salmon.rb
66
+ test/test_snap.rb
67
+ test/test_transrate.rb
68
+ test/test_transrater.rb
69
+ test/vagrant/centos_6.5_64/Vagrantfile
70
+ test/vagrant/debian_7.4_64/Vagrantfile
71
+ test/vagrant/debian_7.4_64/provision.sh
72
+ test/vagrant/fedora_20_64/Vagrantfile
73
+ test/vagrant/fedora_20_64/provision.sh
74
+ test/vagrant/linux-x86_64/Vagrantfile
75
+ test/vagrant/osx/Vagrantfile
76
+ test/vagrant/ubuntu_12.04_64/Vagrantfile
77
+ test/vagrant/ubuntu_12.04_64/provision.sh
78
+ transrate.gemspec