transrate 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ transrate (0.0.1)
5
+ bettersam
6
+ bio
7
+ rake (~> 10.1.0)
8
+ trollop (~> 2.0)
9
+ which
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ ansi (1.4.3)
15
+ bettersam (0.0.1.alpha)
16
+ bio (1.4.3)
17
+ colorize (0.5.8)
18
+ coveralls (0.6.7)
19
+ colorize
20
+ multi_json (~> 1.3)
21
+ rest-client
22
+ simplecov (>= 0.7)
23
+ thor
24
+ facade (1.0.5)
25
+ mime-types (1.23)
26
+ multi_json (1.7.7)
27
+ pathname2 (1.6.5)
28
+ facade
29
+ rake (10.1.0)
30
+ rest-client (1.6.7)
31
+ mime-types (>= 1.16)
32
+ shoulda-context (1.1.5)
33
+ simplecov (0.7.1)
34
+ multi_json (~> 1.0)
35
+ simplecov-html (~> 0.7.1)
36
+ simplecov-html (0.7.1)
37
+ thor (0.16.0)
38
+ trollop (2.0)
39
+ turn (0.9.6)
40
+ ansi
41
+ which (0.0.2)
42
+ pathname2 (>= 1.4.4)
43
+
44
+ PLATFORMS
45
+ ruby
46
+
47
+ DEPENDENCIES
48
+ coveralls (~> 0.6.7)
49
+ shoulda-context
50
+ simplecov
51
+ transrate!
52
+ turn
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Richard Smith
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,40 @@
1
+ Transrate
2
+ ----
3
+
4
+ Quality analyis and comparison of transcriptome assemblies.
5
+
6
+
7
+ ## Installation
8
+
9
+ You can install transrate very easily. Just run at the terminal:
10
+
11
+ `gem install transrate`
12
+
13
+ If that doesn't work, check the requirements below...
14
+
15
+ ## Usage
16
+
17
+
18
+ ## Requirements
19
+
20
+ ### Ruby
21
+
22
+ First, you'll need Ruby v1.9.3 or greater installed. You can check with:
23
+
24
+ `ruby --version`
25
+
26
+ If you don't have Ruby installed, or you need a higher version, I recommend using [RVM](http://rvm.io/) as your Ruby Version Manager. To install RVM along with the latest ruby, just run:
27
+
28
+ `\curl -L https://get.rvm.io | bash -s stable`
29
+
30
+ ### Rubygems
31
+
32
+ Your Ruby installation *should* come with RubyGems, the package manager for Ruby. You can check with:
33
+
34
+ `gem --version`
35
+
36
+ If you don't have it installed, I recommend installing the latest version of Ruby and RubyGems using the RVM instructions above (in the Requirements:Ruby section.
37
+
38
+ ## Development status
39
+
40
+ This software is in very early development. Nevertheless, we welcome bug reports.
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'trollop'
4
+ require 'transrate'
5
+
6
+ opts = Trollop::options do
7
+ version "v0.0.1a"
8
+ banner <<-EOS
9
+
10
+ Transrate v0.0.1a by Richard Smith <rds45@cam.ac.uk>
11
+
12
+ DESCRIPTION:
13
+ Analyse a de-novo transcriptome
14
+ assembly using three kinds of metrics:
15
+
16
+ 1. contig-based
17
+ 2. read-mapping
18
+ 3. reference-based
19
+
20
+ Please make sure USEARCH and bowtie2 are both installed
21
+ and in the PATH.
22
+
23
+ Bug reports and feature requests at:
24
+ http://github.com/blahah/transrate
25
+
26
+ USAGE:
27
+ transrate <options>
28
+
29
+ OPTIONS:
30
+
31
+ EOS
32
+ opt :assembly, "assembly file in FASTA format", :required => true, :type => String
33
+ opt :reference, "reference proteome file in FASTA format", :required => true, :type => String
34
+ opt :left, "left reads file in FASTQ format", :type => String
35
+ opt :right, "right reads file in FASTQ format", :type => String
36
+ opt :insertsize, "mean insert size", :default => 200, :type => Integer
37
+ opt :insertsd, "insert size standard deviation", :default => 50, :type => Integer
38
+ opt :threads, "number of threads to use", :default => 8, :type => Integer
39
+ end
40
+
41
+ def pretty_print_hash hash, width
42
+ hash.map{ |k, v| "#{k.to_s}#{" " * (width - (k.length + v.to_i.to_s.length))}#{v.to_i}" }.join("\n")
43
+ end
44
+
45
+ include Transrate
46
+
47
+ a = Assembly.new opts.assembly
48
+ r = Assembly.new opts.reference
49
+
50
+ puts "\n\nAnalysing assembly: #{opts.assembly}\n\n"
51
+
52
+ puts "calculating contig stats..."
53
+ t0 = Time.now
54
+ contig_results = a.basic_stats
55
+ puts "...done in #{Time.now - t0} seconds"
56
+
57
+ read_results = nil
58
+ if (opts.left && opts.right)
59
+ puts "\ncalculating read diagnostics..."
60
+ t0 = Time.now
61
+ read_metrics = ReadMetrics.new a
62
+ read_results = read_metrics.run(opts.left, opts.right)
63
+ puts "...done in #{Time.now - t0} seconds"
64
+ else
65
+ puts "\nno reads provided, skipping read diagnostics"
66
+ end
67
+
68
+ puts "\ncalculating comparative metrics..."
69
+ t0 = Time.now
70
+ comparative_metrics = ComparativeMetrics.new(a, r)
71
+ comparative_results = comparative_metrics.run
72
+ puts "...done in #{Time.now - t0} seconds"
73
+
74
+ report_width = 30
75
+
76
+ if contig_results
77
+ puts "\n\n"
78
+ puts "Contig metrics:"
79
+ puts "-" * report_width
80
+ puts pretty_print_hash(contig_results, report_width)
81
+ end
82
+
83
+ if read_results
84
+ puts "\n\n"
85
+ puts "Read mapping metrics:"
86
+ puts "-" * report_width
87
+ puts pretty_print_hash(read_results, report_width)
88
+ end
89
+
90
+ if comparative_results
91
+ puts "\n\n"
92
+ puts "Comparative metrics:"
93
+ puts "-" * report_width
94
+ puts pretty_print_hash(comparative_results, report_width)
95
+ end
@@ -0,0 +1,8 @@
1
+ require 'transrate/version'
2
+ require 'transrate/assembly'
3
+ require 'transrate/bowtie2'
4
+ require 'transrate/read_metrics'
5
+ require 'transrate/usearch'
6
+ require 'transrate/rb_hit'
7
+ require 'transrate/reciprocal_annotation'
8
+ require 'transrate/comparative_metrics'
@@ -0,0 +1,94 @@
1
+ require 'bio'
2
+ require 'bettersam'
3
+ require 'csv'
4
+ require 'forwardable'
5
+
6
+ class Assembly
7
+
8
+ include Enumerable
9
+ extend Forwardable
10
+ def_delegators :@assembly, :each, :<<
11
+
12
+ attr_accessor :ublast_db
13
+ attr_accessor :orfs_ublast_db
14
+ attr_accessor :protein
15
+
16
+ # number of bases in the assembly
17
+ attr_writer :n_bases
18
+
19
+ # assembly filename
20
+ attr_accessor :file
21
+
22
+ # Reuturn a new Assembly.
23
+ #
24
+ # - +:assembly+ - an array of Bio::Sequences
25
+ def initialize file
26
+ @file = file
27
+ @assembly = []
28
+ @n_bases = 0
29
+ Bio::FastaFormat.open(file).each do |entry|
30
+ @n_bases += entry.length
31
+ @assembly << entry
32
+ end
33
+ @assembly.sort_by! { |x| x.length }
34
+ end
35
+
36
+ # Return a new Assembly object by loading sequences
37
+ # from the FASTA-format +:file+
38
+ def self.stats_from_fasta file
39
+ a = Assembly.new file
40
+ a.basic_stats
41
+ end
42
+
43
+ # Return a hash of statistics about this assembly
44
+ def basic_stats
45
+ cumulative_length = 0.0
46
+ # we'll calculate Nx for all these x
47
+ x = [90, 70, 50, 30, 10]
48
+ x2 = x.clone
49
+ cutoff = x2.pop / 100.0
50
+ res = []
51
+ n1k = 0
52
+ n10k = 0
53
+ @assembly.each do |s|
54
+ new_cum_len = cumulative_length + s.length
55
+ prop = new_cum_len / self.n_bases
56
+ n1k += 1 if s.length > 1_000
57
+ n10k += 1 if s.length > 10_000
58
+ if prop >= cutoff
59
+ res << s.length
60
+ break if x2.empty?
61
+ cutoff = x2.pop / 100.0
62
+ end
63
+ cumulative_length = new_cum_len
64
+ end
65
+ mean = cumulative_length / @assembly.size
66
+ ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
67
+ {
68
+ "n_seqs" => @assembly.size,
69
+ "smallest" => @assembly.first.length,
70
+ "largest" => @assembly.last.length,
71
+ "n_bases" => @n_bases,
72
+ "mean_len" => mean,
73
+ "n > 1k" => n1k,
74
+ "n > 10k" => n10k
75
+ }.merge ns
76
+ end
77
+
78
+ # return the number of bases in the assembly, calculating
79
+ # from the assembly if it hasn't already been done.
80
+ def n_bases
81
+ unless @n_bases
82
+ @n_bases = 0
83
+ @assembly.each { |s| @n_bases += s.length }
84
+ end
85
+ @n_bases
86
+ end
87
+
88
+ def print_stats
89
+ self.basic_stats.map do |k, v|
90
+ "#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
91
+ end.join("\n")
92
+ end
93
+
94
+ end # Assembly
@@ -0,0 +1,44 @@
1
+ module Transrate
2
+
3
+ class Bowtie2
4
+
5
+ require 'which'
6
+ include Which
7
+
8
+ def initialize
9
+ bowtie2_path = which('bowtie2')
10
+ raise "could not find bowtie2 in the path" if bowtie2_path.empty?
11
+ @bowtie2 = bowtie2_path.first
12
+ bowtie2_build_path = which('bowtie2-build')
13
+ raise "could not find bowtie2-build in the path" if bowtie2_build_path.empty?
14
+ @bowtie2_build = bowtie2_build_path.first
15
+ end
16
+
17
+ def map_reads file, left, right=nil, insertsize=200, insertsd=50, outputname=nil
18
+ lbase = File.basename(left)
19
+ rbase = File.basename(right)
20
+ outputname ||= "#{lbase}.#{rbase}.sam"
21
+ realistic_dist = insertsize + (3 * insertsd)
22
+ unless File.exists? outputname
23
+ # construct bowtie command
24
+ bowtiecmd = "#{@bowtie2} -k 3 -p 8 -X #{realistic_dist}"
25
+ bowtiecmd += " --no-unal --local --quiet"
26
+ bowtiecmd += " #{File.basename(file)} -1 #{left}"
27
+ # paired end?
28
+ bowtiecmd += " -2 #{right}" if right
29
+ bowtiecmd += " > #{outputname}"
30
+ # run bowtie
31
+ `#{bowtiecmd}`
32
+ end
33
+ outputname
34
+ end
35
+
36
+ def build_index file
37
+ unless File.exists?(file + '.1.bt2')
38
+ `#{@bowtie2_build} --offrate 1 #{file} #{File.basename(file)}`
39
+ end
40
+ end
41
+
42
+ end # Bowtie2
43
+
44
+ end # Transrate
@@ -0,0 +1,46 @@
1
+ require 'set'
2
+
3
+ module Transrate
4
+
5
+ class ComparativeMetrics
6
+
7
+ def initialize assembly, reference
8
+ @assembly = assembly
9
+ @reference = reference
10
+ @usearch = Usearch.new
11
+ end
12
+
13
+ def run
14
+ rbu = self.reciprocal_best_ublast
15
+ ohr = self.ortholog_hit_ratio rbu
16
+ cf = self.collapse_factor @ra.l2r_hits
17
+ {
18
+ :reciprocal_hits => rbu.size,
19
+ :ortholog_hit_ratio => ohr,
20
+ :collapse_factor => cf
21
+ }
22
+ end
23
+
24
+ def reciprocal_best_ublast
25
+ @ra = ReciprocalAnnotation.new @assembly, @reference
26
+ @ra.run
27
+ end
28
+
29
+ def ortholog_hit_ratio rbu
30
+ rbu.reduce(0.0){ |sum, hit| sum += hit.last.tcov.to_f } / rbu.size
31
+ end
32
+
33
+ def collapse_factor hits
34
+ targets = {}
35
+ hits.each_pair do |query, hit|
36
+ unless targets.has_key? query
37
+ targets[query] = Set.new
38
+ end
39
+ targets[query] << hit.target
40
+ end
41
+ targets.values.reduce(0.0){ |sum, val| sum += val.size } / targets.size
42
+ end
43
+
44
+ end # ComparativeMetrics
45
+
46
+ end # Transrate
@@ -0,0 +1,33 @@
1
+ module Transrate
2
+
3
+ class RBHit
4
+
5
+ # Fields: query id, subject id, % identity, alignment length, mismatches,
6
+ # gap opens, q. start, q. end, s. start, s. end, evalue, bit score
7
+ attr_accessor :query, :target, :id, :alnlen, :mismatches
8
+ attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
9
+ attr_accessor :bitscore, :tcov
10
+
11
+ def initialize(list)
12
+ @query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
13
+ @target = list[1].scan(/[^|]+/).first.split.first
14
+ @id = list[2]
15
+ @alnlen = list[3]
16
+ @mismatches = list[4]
17
+ @gaps = list[5]
18
+ @qstart = list[6]
19
+ @qend = list[7]
20
+ @tstart = list[8]
21
+ @tend = list[9]
22
+ @evalue = list[10]
23
+ @bitscore = list[11]
24
+ @tcov = list[12]
25
+ end
26
+
27
+ def to_s
28
+ @query + " => " + @target
29
+ end
30
+
31
+ end # RBHit
32
+
33
+ end # Transrate
@@ -0,0 +1,158 @@
1
+ module Transrate
2
+
3
+ class ReadMetrics
4
+
5
+ def initialize assembly
6
+ @assembly = assembly
7
+ @mapper = Bowtie2.new
8
+ self.initial_values
9
+ end
10
+
11
+ def run left, right, insertsize=200, insertsd=50
12
+ @mapper.build_index @assembly.file
13
+ samfile = @mapper.map_reads(@assembly.file,
14
+ left, right,
15
+ insertsize, insertsd)
16
+ self.analyse_read_mappings(samfile, insertsize, insertsd)
17
+ {
18
+ :total_mappings => @total,
19
+ :good_mappings => @good,
20
+ :bad_mappings => @bad,
21
+ :both_mapped => @both_mapped,
22
+ :properly_paired => @properly_paired,
23
+ :improper_paired => @improperly_paired,
24
+ :proper_orientation => @proper_orientation,
25
+ :improper_orientation => @improper_orientation,
26
+ :same_contig => @same_contig,
27
+ :realistic_overlap => @realistic_overlap,
28
+ :unrealistic_overlap => @unrealistic_overlap,
29
+ :realistic_fragment => @realistic_fragment,
30
+ :unrealistic_fragment => @unrealistic_fragment,
31
+ :potential_bridges => @supported_bridges
32
+ }
33
+ end
34
+
35
+ def analyse_read_mappings samfile, insertsize, insertsd, bridge=true
36
+ @bridges = {} if bridge
37
+ realistic_dist = self.realistic_distance(insertsize, insertsd)
38
+ if File.exists?(samfile) && File.size(samfile) > 0
39
+ ls = BetterSam.new
40
+ rs = BetterSam.new
41
+ sam = File.open(samfile).each_line
42
+ sam.each_slice(2) do |l, r|
43
+ if (l && r) && (ls.parse_line(l) && rs.parse_line(r))
44
+ self.check_read_pair(ls, rs, realistic_dist)
45
+ end
46
+ end
47
+ self.check_bridges if bridge
48
+ else
49
+ raise "samfile #{samfile} not found"
50
+ end
51
+ end
52
+
53
+ def initial_values
54
+ @total = 0
55
+ @good = 0
56
+ @bad = 0
57
+ @both_mapped = 0
58
+ @properly_paired = 0
59
+ @improperly_paired = 0
60
+ @proper_orientation = 0
61
+ @improper_orientation = 0
62
+ @same_contig = 0
63
+ @realistic_overlap = 0
64
+ @unrealistic_overlap = 0
65
+ @realistic_fragment = 0
66
+ @unrealistic_fragment = 0
67
+ end
68
+
69
+ def realistic_distance insertsize, insertsd
70
+ insertsize + (3 * insertsd)
71
+ end
72
+
73
+ def check_read_pair ls, rs, realistic_dist
74
+ return unless ls.primary_aln?
75
+ @total += 1
76
+ if ls.both_mapped?
77
+ # reads are paired
78
+ @both_mapped += 1
79
+ if ls.read_properly_paired?
80
+ # mapped in proper pair
81
+ @properly_paired += 1
82
+ self.check_orientation(ls, rs)
83
+ else
84
+ # not mapped in proper pair
85
+ @improperly_paired += 1
86
+ if ls.chrom == rs.chrom
87
+ # both on same contig
88
+ @same_contig += 1
89
+ self.check_overlap_plausibility(ls, rs)
90
+ else
91
+ self.check_fragment_plausibility(ls, rs, realistic_dist)
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ def check_orientation ls, rs
98
+ if ls.pair_opposite_strands?
99
+ # mates in proper orientation
100
+ @proper_orientation += 1
101
+ @good += 1
102
+ else
103
+ # mates in wrong orientation
104
+ @improper_orientation += 1
105
+ @bad += 1
106
+ end
107
+ end
108
+
109
+ def check_overlap_plausibility ls, rs
110
+ if Math.sqrt((ls.pos - rs.pos) ** 2) < ls.seq.length
111
+ # overlap is realistic
112
+ @realistic_overlap += 1
113
+ self.check_orientation(ls, rs)
114
+ else
115
+ # overlap not realistic
116
+ @unrealistic_overlap+= 1
117
+ @bad += 1
118
+ end
119
+ end
120
+
121
+ def check_fragment_plausibility ls, rs, realistic_dist
122
+ # mates on different contigs
123
+ # are the mapping positions within a realistic distance of
124
+ # the ends of contigs?
125
+ ldist = [ls.pos, ls.seq.length - ls.pos].min
126
+ rdist = [rs.pos, rs.seq.length - rs.pos].min
127
+ if ldist + rdist <= realistic_dist
128
+ # increase the evidence for this bridge
129
+ key = [ls.chrom, rs.chrom].sort.join("<>").to_sym
130
+ if @bridges.has_key? key
131
+ @bridges[key] += 1
132
+ else
133
+ @bridges[key] = 1
134
+ end
135
+ @realistic_fragment += 1
136
+ @good += 1
137
+ else
138
+ @unrealistic_fragment += 1
139
+ @bad += 1
140
+ end
141
+ end
142
+
143
+ def check_bridges
144
+ @supported_bridges = 0
145
+ CSV.open('supported_bridges.csv', 'w') do |f|
146
+ @bridges.each_pair do |b, count|
147
+ start, finish = b.to_s.split('<>')
148
+ if count > 1
149
+ f << [start, finish, count]
150
+ @supported_bridges += 1
151
+ end
152
+ end
153
+ end
154
+ end
155
+
156
+ end # ReadMetrics
157
+
158
+ end # Transrate
@@ -0,0 +1,91 @@
1
+ module Transrate
2
+
3
+ class ReciprocalAnnotation
4
+
5
+ attr_reader :l2r_hits
6
+ attr_reader :r2l_hits
7
+ attr_reader :results
8
+
9
+ def initialize assembly, reference
10
+ @assembly = assembly
11
+ @reference = reference
12
+ @usearch = Usearch.new
13
+ end
14
+
15
+ def run
16
+ self.make_assembly_db
17
+ self.make_reference_db
18
+ left2right, right2left = self.reciprocal_align
19
+ self.parse_results left2right, right2left
20
+ @results
21
+ end
22
+
23
+ def make_assembly_db
24
+ unless @assembly.orfs_ublast_db
25
+ assembly_base = File.basename(@assembly.file, ".*")
26
+ assembly_orfs = assembly_base + ".orfs"
27
+ @usearch.findorfs @assembly.file, assembly_orfs
28
+ assembly_db = assembly_base + ".udb"
29
+ @usearch.makeudb_ublast assembly_orfs, assembly_db
30
+ @assembly.orfs_ublast_db = assembly_db
31
+ end
32
+ end
33
+
34
+ def make_reference_db
35
+ unless @reference.ublast_db
36
+ reference_base = File.basename(@reference.file, ".*")
37
+ reference_db = reference_base + ".udb"
38
+ @usearch.makeudb_ublast @reference.file, reference_db
39
+ @reference.ublast_db = reference_db
40
+ end
41
+ end
42
+
43
+ def reciprocal_align
44
+ left2right = @usearch.ublast @assembly.file, @reference.ublast_db
45
+ right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
46
+ [left2right, right2left]
47
+ end
48
+
49
+ def parse_results left2right, right2left
50
+ l2r_results = self.load_results_file left2right
51
+ r2l_results = self.load_results_file right2left
52
+ @l2r_hits = self.results_to_hits l2r_results
53
+ @r2l_hits = self.results_to_hits r2l_results
54
+ @results = {}
55
+ @l2r_hits.each_pair do |query, best|
56
+ next if best.nil?
57
+ tbest = @r2l_hits[best.target]
58
+ next if tbest.nil?
59
+ @results[query] = best if query == tbest.target
60
+ end
61
+ end
62
+
63
+ def results_to_hits results
64
+ hits = {}
65
+ results.each do |hit|
66
+ if hits.has_key? hit.query
67
+ old_hit = hits[hit.query]
68
+ old_eval, old_bits = old_hit.evalue, old_hit.bitscore
69
+ if hit.bitscore > old_bits
70
+ hits[hit.query] = hit
71
+ elsif hit.bitscore == old_bits && hit.evalue < old_eval
72
+ hits[hit.query] = hit
73
+ end
74
+ else
75
+ hits[hit.query] = hit
76
+ end
77
+ end
78
+ hits
79
+ end
80
+
81
+ def load_results_file file
82
+ results = []
83
+ File.open(file).each_line do |line|
84
+ results << RBHit.new(line.chomp.split("\t"))
85
+ end
86
+ results
87
+ end
88
+
89
+ end # ReciprocalAnnotation
90
+
91
+ end # Transrate
@@ -0,0 +1,55 @@
1
+ module Transrate
2
+
3
+ class Usearch
4
+
5
+ require 'which'
6
+ include Which
7
+
8
+ def initialize threads=8
9
+ @threads = threads
10
+ paths = which('usearch')
11
+ if paths.empty?
12
+ raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
13
+ end
14
+ @cmd = paths.first
15
+ end
16
+
17
+ def custom_output_fields
18
+ " -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
19
+ end
20
+
21
+ def ublast query, target, evalue="1e-5"
22
+ subcmd = " -ublast #{query}"
23
+ subcmd += " -db #{target}"
24
+ subcmd += " -evalue #{evalue}"
25
+ blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
26
+ subcmd += " -userout #{blast6outfile}"
27
+ subcmd += self.custom_output_fields
28
+ subcmd += " -strand both"
29
+ subcmd += " -threads #{@threads}"
30
+ self.run subcmd
31
+ blast6outfile
32
+ end
33
+
34
+ def makeudb_ublast filepath, output
35
+ subcmd = " -makeudb_ublast #{filepath}"
36
+ subcmd += " -output #{output}"
37
+ self.run subcmd
38
+ end
39
+
40
+ def findorfs filepath, output
41
+ subcmd = " -findorfs #{filepath}"
42
+ subcmd += " -output #{output}"
43
+ subcmd += " -xlat"
44
+ subcmd += " -orfstyle 7"
45
+ self.run subcmd
46
+ end
47
+
48
+ def run subcmd
49
+ subcmd += " -quiet"
50
+ `#{@cmd}#{subcmd}`
51
+ end
52
+
53
+ end # Usearch
54
+
55
+ end # Transrate
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ module Transrate
4
+ module VERSION
5
+ MAJOR = 0
6
+ MINOR = 0
7
+ PATCH = 1
8
+ BUILD = nil
9
+
10
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
11
+ end
12
+ end # Transrate
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require File.expand_path('../lib/transrate/version', __FILE__)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = 'transrate'
7
+ gem.authors = [ "Richard Smith" ]
8
+ gem.email = "rds45@cam.ac.uk"
9
+ gem.homepage = 'https://github.com/blahah/assemblotron'
10
+ gem.summary = %q{ quality assessment of de-novo transcriptome assemblies }
11
+ gem.description = %q{ a library and command-line tool for quality assessment of de-novo transcriptome assemblies }
12
+ gem.version = Transrate::VERSION::STRING.dup
13
+
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ gem.require_paths = %w[ lib ]
17
+
18
+ gem.add_dependency 'rake', '~> 10.1.0'
19
+ gem.add_dependency 'trollop', '~> 2.0'
20
+ gem.add_dependency 'which'
21
+ gem.add_dependency 'bio'
22
+ gem.add_dependency 'bettersam', '~> 0.0.1.alpha'
23
+
24
+ gem.add_development_dependency 'turn'
25
+ gem.add_development_dependency 'simplecov'
26
+ gem.add_development_dependency 'shoulda-context'
27
+ gem.add_development_dependency 'coveralls', '~> 0.6.7'
28
+ end
metadata ADDED
@@ -0,0 +1,206 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: transrate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Richard Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 10.1.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 10.1.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: trollop
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '2.0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '2.0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: which
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: bio
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: bettersam
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 0.0.1.alpha
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 0.0.1.alpha
94
+ - !ruby/object:Gem::Dependency
95
+ name: turn
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: simplecov
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: shoulda-context
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ - !ruby/object:Gem::Dependency
143
+ name: coveralls
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 0.6.7
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 0.6.7
158
+ description: ! ' a library and command-line tool for quality assessment of de-novo
159
+ transcriptome assemblies '
160
+ email: rds45@cam.ac.uk
161
+ executables:
162
+ - transrate
163
+ extensions: []
164
+ extra_rdoc_files: []
165
+ files:
166
+ - .gitignore
167
+ - Gemfile
168
+ - Gemfile.lock
169
+ - LICENSE
170
+ - README.md
171
+ - bin/transrate
172
+ - lib/transrate.rb
173
+ - lib/transrate/assembly.rb
174
+ - lib/transrate/bowtie2.rb
175
+ - lib/transrate/comparative_metrics.rb
176
+ - lib/transrate/rb_hit.rb
177
+ - lib/transrate/read_metrics.rb
178
+ - lib/transrate/reciprocal_annotation.rb
179
+ - lib/transrate/usearch.rb
180
+ - lib/transrate/version.rb
181
+ - transrate.gemspec
182
+ homepage: https://github.com/blahah/assemblotron
183
+ licenses: []
184
+ post_install_message:
185
+ rdoc_options: []
186
+ require_paths:
187
+ - lib
188
+ required_ruby_version: !ruby/object:Gem::Requirement
189
+ none: false
190
+ requirements:
191
+ - - ! '>='
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
194
+ required_rubygems_version: !ruby/object:Gem::Requirement
195
+ none: false
196
+ requirements:
197
+ - - ! '>='
198
+ - !ruby/object:Gem::Version
199
+ version: '0'
200
+ requirements: []
201
+ rubyforge_project:
202
+ rubygems_version: 1.8.24
203
+ signing_key:
204
+ specification_version: 3
205
+ summary: quality assessment of de-novo transcriptome assemblies
206
+ test_files: []