transrate 1.0.0 → 1.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (53) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +17 -0
  3. data/README.md +1 -1
  4. data/Rakefile +1 -1
  5. data/bin/transrate +2 -375
  6. data/docs/transrate_logo_full.png +0 -0
  7. data/files.txt +78 -0
  8. data/lib/transrate.rb +1 -0
  9. data/lib/transrate/assembly.rb +4 -0
  10. data/lib/transrate/cmdline.rb +543 -0
  11. data/lib/transrate/comparative_metrics.rb +3 -0
  12. data/lib/transrate/snap.rb +10 -0
  13. data/lib/transrate/version.rb +1 -1
  14. data/packaging/build_deps_linux.sh +57 -0
  15. data/packaging/build_deps_macosx.sh +55 -0
  16. data/packaging/bundler-config +3 -0
  17. data/packaging/minify.sh +43 -0
  18. data/packaging/transrate +20 -0
  19. data/test/data/test_contig_nc1.fa +2 -0
  20. data/test/data/test_contig_nc2.fa +4 -0
  21. data/test/data/test_contig_nc3.fa +6 -0
  22. data/test/data/test_contig_nc4.fa +4 -0
  23. data/test/data/test_contig_nc5.fa +6 -0
  24. data/test/data/test_contig_nc6.fa +2 -0
  25. data/test/data/test_contig_nc7.fa +2 -0
  26. data/test/data/test_reference_aa1.fa +2 -0
  27. data/test/data/test_reference_nc1.fa +2 -0
  28. data/test/helper.rb +35 -6
  29. data/test/test_assembly.rb +5 -3
  30. data/test/test_cmd.rb +1 -1
  31. data/test/test_cmdline.rb +114 -0
  32. data/test/test_comp_metrics.rb +2 -2
  33. data/test/test_contig.rb +1 -1
  34. data/test/test_contig_metrics.rb +1 -1
  35. data/test/test_inline.rb +1 -1
  36. data/test/test_optimiser.rb +16 -16
  37. data/test/test_read_metrics.rb +1 -1
  38. data/test/test_salmon.rb +1 -1
  39. data/test/test_snap.rb +1 -1
  40. data/test/test_transrate.rb +1 -1
  41. data/test/test_transrater.rb +2 -2
  42. data/test/vagrant/centos_6.5_64/Vagrantfile +122 -0
  43. data/test/vagrant/debian_7.4_64/Vagrantfile +126 -0
  44. data/test/vagrant/debian_7.4_64/provision.sh +28 -0
  45. data/test/vagrant/fedora_20_64/Vagrantfile +122 -0
  46. data/test/vagrant/fedora_20_64/provision.sh +16 -0
  47. data/test/vagrant/linux-x86_64/Vagrantfile +10 -0
  48. data/test/vagrant/osx/Vagrantfile +18 -0
  49. data/test/vagrant/ubuntu_12.04_64/Vagrantfile +126 -0
  50. data/test/vagrant/ubuntu_12.04_64/provision.sh +24 -0
  51. data/transrate.gemspec +2 -2
  52. metadata +38 -17
  53. data/test/test_bin.rb +0 -139
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1cdc0e54bd5a77cc14727d6c42e739e6c9fa4d10
4
- data.tar.gz: ee950d7fcd11d5662a7012c1aeefb597dfe7dd4a
3
+ metadata.gz: d6bdeb8a50ecd17a0178e45db4bf4f927634cdc0
4
+ data.tar.gz: aaf1a27b63ef7e5a8388cbf52268acb50a362672
5
5
  SHA512:
6
- metadata.gz: aa61ac533468fdcd3dbca9949996a4d6c62dcf540fd8f0f7194251b9b88415e28f9c54e884586853ec12244e4778d4bc840a396be3c85c55acb09f71a0d77e4d
7
- data.tar.gz: f55918c002f429f1d4912feca88b91e7647bb4064348e254860673d8d9d52bd8bad512b3f7e23e166ad87defbe67de51eeb043620a43dc51eed61e4d40aeece1
6
+ metadata.gz: d3c2c0c0dbba505ed8ca05b3136cf531495b246cfa0ba6b378e8651c3764d0673282668bf6a658bda148cc2139107fabc83540749b474529e74ae6b3862a9f0d
7
+ data.tar.gz: 53f1676cf1c5cc56d44a1cb5fcc58bd4137486292e379a959e49931121f1d8f83faa10b3ba24f9665bf7a9595a92e53908a39948e9d0f7a481dbaef128d9f9f7
data/.travis.yml ADDED
@@ -0,0 +1,17 @@
1
+ language: ruby
2
+ rvm:
3
+ - "2.0.0"
4
+ - "2.1.0"
5
+ - "2.2.0"
6
+ before_script:
7
+ - gem install bindeps
8
+ - bundle install
9
+ - bundle exec rake compile
10
+ - bundle exec bin/transrate --install-deps all
11
+ os:
12
+ - linux
13
+ - osx
14
+ matrix:
15
+ allow_failures:
16
+ - os: osx
17
+ - rvm: "2.2.0"
data/README.md CHANGED
@@ -28,7 +28,7 @@ This software is being actively developed. Please be aware that there may be bug
28
28
 
29
29
  ## Citation
30
30
 
31
- Transrate is pre-publication academic software. If you use it, please cite the github repository and the DOI: [![DOI](https://zenodo.org/badge/3687/Blahah/transrate.svg)](http://dx.doi.org/10.5281/zenodo.18325).
31
+ Transrate is pre-publication academic software. If you use it, please cite [the preprint on biorXiv](http://dx.doi.org/10.1101/021626).
32
32
 
33
33
  ## Documentation
34
34
 
data/Rakefile CHANGED
@@ -90,7 +90,7 @@ task :default => :test
90
90
  # PACKAGING
91
91
 
92
92
  PACKAGE_NAME = "transrate"
93
- VERSION = "1.0.0"
93
+ VERSION = "1.0.1"
94
94
  TRAVELING_RUBY_VERSION = "20150210-2.2.0"
95
95
 
96
96
  desc "Package your app"
data/bin/transrate CHANGED
@@ -1,16 +1,7 @@
1
1
  #!/usr/bin/env ruby
2
-
3
- require 'trollop'
4
2
  require 'transrate'
5
- require 'csv'
6
- require 'bindeps'
7
- require 'colorize'
8
-
9
3
  include Transrate
10
4
 
11
- # Show the help message if no arguments provided
12
- ARGV[0] = "--help" if ARGV.length() == 0
13
-
14
5
  # We want clean error messages through the logger, no ugly backtraces
15
6
  # because the user doesn't care about them, unless they specifically ask for
16
7
  # them with --loglevel debug
@@ -28,369 +19,5 @@ module Kernel
28
19
  end
29
20
  end
30
21
 
31
- txp = '░▓▓▓^▓▓▓░'
32
- toptxp = txp.green
33
- midtxp = txp.yellow
34
- bottxp = txp.red
35
-
36
- opts = Trollop::options do
37
- version Transrate::VERSION::STRING.dup
38
- banner <<-EOS
39
- _ _
40
- | |_ _ __ __ _ _ __ ___ _ __ __ _ | |_ ___
41
- #{toptxp} | __|| '__|/ _` || '_ \\ / __|| '__|/ _` || __|/ _ \\ #{toptxp}
42
- #{midtxp} | |_ | | | (_| || | | |\\__ \\| | | (_| || |_| __/ #{midtxp}
43
- #{bottxp} \\__||_| \\__,_||_| |_||___/|_| \\__,_| \\__|\\___| #{bottxp}
44
-
45
- Transrate v#{Transrate::VERSION::STRING.dup}
46
- by Richard Smith-Unna, Chris Boursnell, Rob Patro,
47
- Julian Hibberd, and Steve Kelly
48
-
49
- DESCRIPTION:
50
- Analyse a de-novo transcriptome assembly using three kinds of metrics:
51
-
52
- 1. sequence based (if --assembly is given)
53
- 2. read mapping based (if --left and --right are given)
54
- 3. reference based (if --reference is given)
55
-
56
- Documentation at http://hibberdlab.com/transrate
57
-
58
- USAGE:
59
- transrate <options>
60
-
61
- OPTIONS:
62
-
63
- EOS
64
- opt :assembly, "Assembly file(s) in FASTA format, comma-separated",
65
- :type => String
66
- opt :left, "Left reads file in FASTQ format",
67
- :type => String
68
- opt :right, "Right reads file in FASTQ format",
69
- :type => String
70
- opt :reference, "Reference proteome or transcriptome file in FASTA format",
71
- :type => String
72
- opt :threads, "Number of threads to use",
73
- :default => 8,
74
- :type => Integer
75
- opt :merge_assemblies, "Merge best contigs from multiple assemblies into file",
76
- :type => String
77
- opt :outfile, "Prefix filename to use for CSV output",
78
- :default => 'transrate'
79
- opt :loglevel, "Log level. " +
80
- "One of [error, info, warn, debug]",
81
- :default => 'info'
82
- opt :install_deps, "Install any missing dependencies. One of [all, read, ref]",
83
- :type => String, :default => nil
84
- opt :examples, "Show some example commands with explanations"
85
- end
86
-
87
- if opts.examples
88
- puts <<-EOS
89
-
90
- Transrate v#{Transrate::VERSION::STRING.dup}
91
-
92
- EXAMPLE COMMANDS:
93
-
94
- # check dependencies and install any that are missing
95
- transrate --install-deps
96
-
97
- # get the transrate score for the assembly and each contig
98
- transrate --assembly contigs.fa --left left.fq --right right.fq
99
-
100
- # basic assembly metrics only
101
- transrate --assembly contigs.fa
102
-
103
- # basic and reference-based metrics with 8 threads
104
- transrate --assembly contigs.fa --reference ref.fa --threads 8
105
-
106
- # contig and read-based metrics for two assemblies with 32 threads
107
- transrate --assembly one.fa,two.fa --left l.fq --right r.fq --threads 32
108
-
109
- EOS
110
- exit(0)
111
- end
112
-
113
- # Check dependencies if they are relevant to the command issued,
114
- # and handle any commands to install missing ones
115
- gem_dir = Gem.loaded_specs['transrate'].full_gem_path
116
- gem_deps = File.join(gem_dir, 'deps', 'deps.yaml')
117
- blast_dep = File.join(gem_dir, 'deps', 'blast.yaml')
118
-
119
- deps, read_deps, ref_deps = nil
120
- unless opts.install_deps.nil?
121
-
122
- unless %w[all read ref].include? opts.install_deps
123
- raise TransrateError.new "install-deps #{opts.install_deps} is not valid. " +
124
- "You must specify one of: all, read, ref."
125
- end
126
-
127
- deps = opts.install_deps == 'all'
128
- read_deps = opts.install_deps == 'read'
129
- ref_deps = opts.install_deps == 'ref'
130
- end
131
-
132
- if deps || read_deps || ref_deps
133
- # user has requested dependency installation
134
- puts "Checking dependencies"
135
-
136
- missing = []
137
- if deps || read_deps
138
- Bindeps.require gem_deps
139
- missing += Bindeps.missing gem_deps
140
- end
141
-
142
- if deps || ref_deps
143
- Bindeps.require blast_dep
144
- missing += Bindeps.missing blast_dep
145
- end
146
-
147
- unless missing.empty?
148
- list = missing.collect {|i| "#{i.name}:#{i.version}"}.join("\n - ")
149
- msg = "Failed to install: \n - #{list}"
150
- raise TransrateError.new msg
151
- end
152
-
153
- puts "All dependencies installed"
154
- exit
155
-
156
- else
157
- # no dependency installation requested, but check dependencies
158
- # for the commands provided are installed
159
- missing = []
160
- missing = Bindeps.missing gem_deps if opts.left
161
- blast_missing = []
162
- blast_missing = Bindeps.missing blast_dep if opts.reference
163
-
164
- if missing.length + blast_missing.length > 0
165
- puts "Dependencies are missing:"
166
-
167
- missing.each do |dep|
168
- puts " - #{dep.name} (#{dep.version})"
169
- end
170
-
171
- blast_missing.each do |dep|
172
- puts " - #{dep.name} (#{dep.version})"
173
- end
174
-
175
- puts "To install all missing dependencies, run:"
176
- puts " transrate --install-deps a;;"
177
- puts "If you only want the read-metrics dependencies:"
178
- puts " transrate --install-deps read"
179
- puts "Or if you only want the reference-metrics dependencies: "
180
- puts " transrate --install-deps ref"
181
-
182
- exit 1
183
- end
184
-
185
- end
186
-
187
- # Handle commands
188
- unless %w[error info warn debug].include? opts.loglevel
189
- raise TransrateError.new "Loglevel #{opts.loglevel} is not valid. " +
190
- "It must be one of: error, info, warn, debug."
191
- end
192
-
193
- logger.level = Yell::Level.new opts.loglevel.to_sym
194
-
195
- if opts.assembly
196
- opts.assembly.split(',').each do |assembly_file|
197
- unless File.exist?(assembly_file)
198
- raise TransrateIOError.new "Assembly fasta file does not exist: " +
199
- " #{assembly_file}"
200
- end
201
- end
202
- else
203
- raise TransrateArgError.new "Option --assembly must be specified. " +
204
- "Try --help for help."
205
- end
206
-
207
- if opts.reference && !File.exist?(opts.reference)
208
- raise TransrateIOError.new "Reference fasta file does not exist: " +
209
- " #{opts.reference}"
210
- end
211
-
212
- if opts.left and opts.right
213
- if opts.left.split(",").length != opts.right.split(",").length
214
- msg = "Please provide the same number of left reads as right reads"
215
- raise TransrateArgError.new msg
216
- end
217
- opts.left.split(",").zip(opts.right.split(",")).each do |left,right|
218
- if !File.exist?(left)
219
- raise TransrateIOError.new "Left read fastq file does not exist: #{left}"
220
- end
221
- if !File.exist?(right)
222
- raise TransrateIOError.new "Right read fastq file does not exist: #{right}"
223
- end
224
- end
225
- end
226
-
227
- def pretty_print_hash hash, width, round=2
228
- hash.map do |k, v|
229
- # show as float if there are any decimal places
230
- if v.to_f.round(round).to_s.split('.').last.to_i > 0
231
- v = v.to_f.round(round)
232
- end
233
- if v.is_a? Float
234
- v = v.round(round)
235
- end
236
- pad = (width - (k.to_s.length + v.to_s.length))
237
- pad = [pad, 0].max
238
- logger.info "#{k.to_s.split('_').join(' ')}" +
239
- "#{" " * pad}" +
240
- "#{v}"
241
- end
242
- end
243
-
244
- r = opts.reference ? Assembly.new(opts.reference) : nil
245
- report_width = 35
246
-
247
- # loop through the assemblies, storing their outputs in an array of hashes
248
- all = []
249
-
250
- assemblies=opts.assembly
251
- if opts.merge_assemblies
252
- merged_file = opts.merge_assemblies
253
- merged = {}
254
- assemblies.split(",").each do |file|
255
- Bio::FastaFormat.open(file).each do |entry|
256
- contig_name = "#{File.basename(file,File.extname(file))}:"
257
- contig_name << "#{entry.entry_id}"
258
- merged[contig_name] = entry.seq
259
- end
260
- end
261
- logger.info "Merging assemblies into one file...'#{merged_file}'"
262
- File.open(merged_file, "wb") do |out|
263
- merged.each do |name, seq|
264
- out.write ">#{name}\n"
265
- out.write "#{seq}\n"
266
- end
267
- end
268
-
269
- assemblies = merged_file
270
- end
271
-
272
- assemblies.split(',').each do |assembly|
273
-
274
- logger.info "Loading assembly: #{assembly}"
275
-
276
- a = Assembly.new assembly
277
- transrater = Transrater.new(a, r, threads: opts.threads)
278
-
279
- logger.info "Analysing assembly: #{assembly}"
280
-
281
- contig_results = {}
282
-
283
- logger.info "Calculating contig metrics..."
284
- t0 = Time.now
285
- contig_results = transrater.assembly_metrics.basic_stats
286
- contig_results.merge! transrater.assembly.contig_metrics.results
287
- if contig_results
288
- logger.info "Contig metrics:"
289
- logger.info "-" * report_width
290
- pretty_print_hash(contig_results, report_width)
291
- end
292
-
293
- logger.info "Contig metrics done in #{(Time.now - t0).round} seconds"
294
-
295
- read_results = {}
296
-
297
- if (opts.left && opts.right)
298
- logger.info "Calculating read diagnostics..."
299
- t0 = Time.now
300
- read_results = transrater.read_metrics(opts.left, opts.right).read_stats
301
-
302
- if read_results
303
- logger.info "Read mapping metrics:"
304
- logger.info "-" * report_width
305
- pretty_print_hash(read_results, report_width)
306
- end
307
-
308
- logger.info "Read metrics done in #{(Time.now - t0).round} seconds"
309
- else
310
- logger.info "No reads provided, skipping read diagnostics"
311
- end
312
-
313
- comparative_results={}
314
-
315
- if opts.reference
316
- logger.info "Calculating comparative metrics..."
317
- t0 = Time.now
318
- comparative_metrics = transrater.comparative_metrics
319
- comparative_results = comparative_metrics.comp_stats
320
-
321
- if comparative_results
322
- logger.info "Comparative metrics:"
323
- logger.info "-" * report_width
324
- pretty_print_hash(comparative_results, report_width)
325
- end
326
-
327
- logger.info "Comparative metrics done in #{(Time.now - t0).round} seconds"
328
-
329
- logger.info "-" * report_width
330
- else
331
- logger.info "No reference provided, skipping comparative diagnostics"
332
- end
333
-
334
- prefix = "#{opts.outfile}_#{File.basename(assembly)}"
335
-
336
- if (opts.left && opts.right)
337
- score = transrater.assembly_score
338
-
339
- optimal, cutoff = transrater.assembly_optimal_score prefix
340
- unless score.nil?
341
- pretty_print_hash({:TRANSRATE_ASSEMBLY_SCORE => score}, report_width, 4)
342
- logger.info "-" * report_width
343
- pretty_print_hash({:TRANSRATE_OPTIMAL_SCORE => optimal}, report_width, 4)
344
- pretty_print_hash({:TRANSRATE_OPTIMAL_CUTOFF => cutoff}, report_width, 4)
345
- pretty_print_hash(transrater.good_contigs, report_width)
346
- end
347
- end
348
-
349
- # write contig metrics to file for each contig
350
- outfile = "#{prefix}_contigs.csv"
351
- logger.info "Writing contig metrics for each contig to #{outfile}"
352
- # have option to turn off, default on
353
- first=true
354
- CSV.open(outfile, 'wb') do |csv|
355
- a.each do |name, contig|
356
- basic_metrics = {:contig_name => name}.merge(contig.basic_metrics)
357
- if opts.reference
358
- comp_metrics = contig.comparative_metrics
359
- basic_metrics.merge!(comp_metrics)
360
- end
361
- if opts.left and opts.right
362
- read_metrics = contig.read_metrics
363
- basic_metrics.merge!(read_metrics)
364
- end
365
- if first
366
- csv << basic_metrics.keys
367
- first = false
368
- end
369
- csv << basic_metrics.values.map{ |x| x.is_a?(Float) ? x.round(6) : x }
370
- end
371
- end
372
-
373
- all << contig_results.merge(read_results)
374
- .merge(comparative_results)
375
- .merge({ :assembly => assembly })
376
- .merge({ :score => score })
377
- .merge({ :optimal_score => optimal })
378
- .merge({ :cutoff => cutoff })
379
-
380
- end
381
-
382
- # write out all resuls to .csv
383
- outfile = "#{opts.outfile}_assemblies.csv"
384
- logger.info "Writing analysis results to #{outfile}"
385
- CSV.open(outfile, 'wb') do |file|
386
- keys = all[0].keys
387
- keys.delete(:assembly)
388
- head = [:assembly] + keys
389
- file << head
390
- all.each do |row|
391
- file << head.map { |x|
392
- entry = row[x]
393
- entry.is_a?(Float) ? entry.round(5) : entry
394
- }
395
- end
396
- end
22
+ cmdline = Cmdline.new ARGV
23
+ cmdline.run
Binary file
data/files.txt ADDED
@@ -0,0 +1,78 @@
1
+ .gitignore
2
+ .travis.yml
3
+ CITATION
4
+ Gemfile
5
+ LICENSE
6
+ README.md
7
+ Rakefile
8
+ bin/transrate
9
+ deps/blast.yaml
10
+ deps/deps.yaml
11
+ docs/transrate_logo_full.png
12
+ ext/transrate/extconf.rb
13
+ ext/transrate/transrate.c
14
+ files.txt
15
+ lib/transrate.rb
16
+ lib/transrate/assembly.rb
17
+ lib/transrate/cmd.rb
18
+ lib/transrate/cmdline.rb
19
+ lib/transrate/comparative_metrics.rb
20
+ lib/transrate/contig.rb
21
+ lib/transrate/contig_metrics.rb
22
+ lib/transrate/read_metrics.rb
23
+ lib/transrate/salmon.rb
24
+ lib/transrate/score_optimiser.rb
25
+ lib/transrate/snap.rb
26
+ lib/transrate/transrater.rb
27
+ lib/transrate/version.rb
28
+ lib/transrate/writer.rb
29
+ packaging/build_deps_linux.sh
30
+ packaging/build_deps_macosx.sh
31
+ packaging/bundler-config
32
+ packaging/minify.sh
33
+ packaging/transrate
34
+ test/data/150uncovered.l.fq
35
+ test/data/150uncovered.r.fq
36
+ test/data/Os.protein.2.fa
37
+ test/data/Os.protein.fa
38
+ test/data/assembly.2.fa
39
+ test/data/assembly.fasta
40
+ test/data/bridging_reads.l.fastq
41
+ test/data/bridging_reads.r.fastq
42
+ test/data/sorghum_100.fa
43
+ test/data/sorghum_transcript.fa
44
+ test/data/test.sf
45
+ test/data/test_contig_nc1.fa
46
+ test/data/test_contig_nc2.fa
47
+ test/data/test_contig_nc3.fa
48
+ test/data/test_contig_nc4.fa
49
+ test/data/test_contig_nc5.fa
50
+ test/data/test_contig_nc6.fa
51
+ test/data/test_contig_nc7.fa
52
+ test/data/test_reference_aa1.fa
53
+ test/data/test_reference_nc1.fa
54
+ test/data/tiny.sam
55
+ test/helper.rb
56
+ test/test_assembly.rb
57
+ test/test_cmd.rb
58
+ test/test_cmdline.rb
59
+ test/test_comp_metrics.rb
60
+ test/test_contig.rb
61
+ test/test_contig_metrics.rb
62
+ test/test_inline.rb
63
+ test/test_optimiser.rb
64
+ test/test_read_metrics.rb
65
+ test/test_salmon.rb
66
+ test/test_snap.rb
67
+ test/test_transrate.rb
68
+ test/test_transrater.rb
69
+ test/vagrant/centos_6.5_64/Vagrantfile
70
+ test/vagrant/debian_7.4_64/Vagrantfile
71
+ test/vagrant/debian_7.4_64/provision.sh
72
+ test/vagrant/fedora_20_64/Vagrantfile
73
+ test/vagrant/fedora_20_64/provision.sh
74
+ test/vagrant/linux-x86_64/Vagrantfile
75
+ test/vagrant/osx/Vagrantfile
76
+ test/vagrant/ubuntu_12.04_64/Vagrantfile
77
+ test/vagrant/ubuntu_12.04_64/provision.sh
78
+ transrate.gemspec