transrate 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ coverage
6
+ InstalledFiles
7
+ lib/bundler/man
8
+ pkg
9
+ rdoc
10
+ spec/reports
11
+ test/tmp
12
+ test/version_tmp
13
+ tmp
14
+
15
+ # YARD artifacts
16
+ .yardoc
17
+ _yardoc
18
+ doc/
data/Gemfile ADDED
@@ -0,0 +1,3 @@
1
+ source "https://rubygems.org"
2
+
3
+ gemspec
@@ -0,0 +1,52 @@
1
+ PATH
2
+ remote: .
3
+ specs:
4
+ transrate (0.0.1)
5
+ bettersam
6
+ bio
7
+ rake (~> 10.1.0)
8
+ trollop (~> 2.0)
9
+ which
10
+
11
+ GEM
12
+ remote: https://rubygems.org/
13
+ specs:
14
+ ansi (1.4.3)
15
+ bettersam (0.0.1.alpha)
16
+ bio (1.4.3)
17
+ colorize (0.5.8)
18
+ coveralls (0.6.7)
19
+ colorize
20
+ multi_json (~> 1.3)
21
+ rest-client
22
+ simplecov (>= 0.7)
23
+ thor
24
+ facade (1.0.5)
25
+ mime-types (1.23)
26
+ multi_json (1.7.7)
27
+ pathname2 (1.6.5)
28
+ facade
29
+ rake (10.1.0)
30
+ rest-client (1.6.7)
31
+ mime-types (>= 1.16)
32
+ shoulda-context (1.1.5)
33
+ simplecov (0.7.1)
34
+ multi_json (~> 1.0)
35
+ simplecov-html (~> 0.7.1)
36
+ simplecov-html (0.7.1)
37
+ thor (0.16.0)
38
+ trollop (2.0)
39
+ turn (0.9.6)
40
+ ansi
41
+ which (0.0.2)
42
+ pathname2 (>= 1.4.4)
43
+
44
+ PLATFORMS
45
+ ruby
46
+
47
+ DEPENDENCIES
48
+ coveralls (~> 0.6.7)
49
+ shoulda-context
50
+ simplecov
51
+ transrate!
52
+ turn
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ The MIT License (MIT)
2
+
3
+ Copyright (c) 2013 Richard Smith
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy of
6
+ this software and associated documentation files (the "Software"), to deal in
7
+ the Software without restriction, including without limitation the rights to
8
+ use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
+ the Software, and to permit persons to whom the Software is furnished to do so,
10
+ subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
+ FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
+ COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
+ IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
+ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,40 @@
1
+ Transrate
2
+ ----
3
+
4
+ Quality analyis and comparison of transcriptome assemblies.
5
+
6
+
7
+ ## Installation
8
+
9
+ You can install transrate very easily. Just run at the terminal:
10
+
11
+ `gem install transrate`
12
+
13
+ If that doesn't work, check the requirements below...
14
+
15
+ ## Usage
16
+
17
+
18
+ ## Requirements
19
+
20
+ ### Ruby
21
+
22
+ First, you'll need Ruby v1.9.3 or greater installed. You can check with:
23
+
24
+ `ruby --version`
25
+
26
+ If you don't have Ruby installed, or you need a higher version, I recommend using [RVM](http://rvm.io/) as your Ruby Version Manager. To install RVM along with the latest ruby, just run:
27
+
28
+ `\curl -L https://get.rvm.io | bash -s stable`
29
+
30
+ ### Rubygems
31
+
32
+ Your Ruby installation *should* come with RubyGems, the package manager for Ruby. You can check with:
33
+
34
+ `gem --version`
35
+
36
+ If you don't have it installed, I recommend installing the latest version of Ruby and RubyGems using the RVM instructions above (in the Requirements:Ruby section.
37
+
38
+ ## Development status
39
+
40
+ This software is in very early development. Nevertheless, we welcome bug reports.
@@ -0,0 +1,95 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'trollop'
4
+ require 'transrate'
5
+
6
+ opts = Trollop::options do
7
+ version "v0.0.1a"
8
+ banner <<-EOS
9
+
10
+ Transrate v0.0.1a by Richard Smith <rds45@cam.ac.uk>
11
+
12
+ DESCRIPTION:
13
+ Analyse a de-novo transcriptome
14
+ assembly using three kinds of metrics:
15
+
16
+ 1. contig-based
17
+ 2. read-mapping
18
+ 3. reference-based
19
+
20
+ Please make sure USEARCH and bowtie2 are both installed
21
+ and in the PATH.
22
+
23
+ Bug reports and feature requests at:
24
+ http://github.com/blahah/transrate
25
+
26
+ USAGE:
27
+ transrate <options>
28
+
29
+ OPTIONS:
30
+
31
+ EOS
32
+ opt :assembly, "assembly file in FASTA format", :required => true, :type => String
33
+ opt :reference, "reference proteome file in FASTA format", :required => true, :type => String
34
+ opt :left, "left reads file in FASTQ format", :type => String
35
+ opt :right, "right reads file in FASTQ format", :type => String
36
+ opt :insertsize, "mean insert size", :default => 200, :type => Integer
37
+ opt :insertsd, "insert size standard deviation", :default => 50, :type => Integer
38
+ opt :threads, "number of threads to use", :default => 8, :type => Integer
39
+ end
40
+
41
+ def pretty_print_hash hash, width
42
+ hash.map{ |k, v| "#{k.to_s}#{" " * (width - (k.length + v.to_i.to_s.length))}#{v.to_i}" }.join("\n")
43
+ end
44
+
45
+ include Transrate
46
+
47
+ a = Assembly.new opts.assembly
48
+ r = Assembly.new opts.reference
49
+
50
+ puts "\n\nAnalysing assembly: #{opts.assembly}\n\n"
51
+
52
+ puts "calculating contig stats..."
53
+ t0 = Time.now
54
+ contig_results = a.basic_stats
55
+ puts "...done in #{Time.now - t0} seconds"
56
+
57
+ read_results = nil
58
+ if (opts.left && opts.right)
59
+ puts "\ncalculating read diagnostics..."
60
+ t0 = Time.now
61
+ read_metrics = ReadMetrics.new a
62
+ read_results = read_metrics.run(opts.left, opts.right)
63
+ puts "...done in #{Time.now - t0} seconds"
64
+ else
65
+ puts "\nno reads provided, skipping read diagnostics"
66
+ end
67
+
68
+ puts "\ncalculating comparative metrics..."
69
+ t0 = Time.now
70
+ comparative_metrics = ComparativeMetrics.new(a, r)
71
+ comparative_results = comparative_metrics.run
72
+ puts "...done in #{Time.now - t0} seconds"
73
+
74
+ report_width = 30
75
+
76
+ if contig_results
77
+ puts "\n\n"
78
+ puts "Contig metrics:"
79
+ puts "-" * report_width
80
+ puts pretty_print_hash(contig_results, report_width)
81
+ end
82
+
83
+ if read_results
84
+ puts "\n\n"
85
+ puts "Read mapping metrics:"
86
+ puts "-" * report_width
87
+ puts pretty_print_hash(read_results, report_width)
88
+ end
89
+
90
+ if comparative_results
91
+ puts "\n\n"
92
+ puts "Comparative metrics:"
93
+ puts "-" * report_width
94
+ puts pretty_print_hash(comparative_results, report_width)
95
+ end
@@ -0,0 +1,8 @@
1
+ require 'transrate/version'
2
+ require 'transrate/assembly'
3
+ require 'transrate/bowtie2'
4
+ require 'transrate/read_metrics'
5
+ require 'transrate/usearch'
6
+ require 'transrate/rb_hit'
7
+ require 'transrate/reciprocal_annotation'
8
+ require 'transrate/comparative_metrics'
@@ -0,0 +1,94 @@
1
+ require 'bio'
2
+ require 'bettersam'
3
+ require 'csv'
4
+ require 'forwardable'
5
+
6
+ class Assembly
7
+
8
+ include Enumerable
9
+ extend Forwardable
10
+ def_delegators :@assembly, :each, :<<
11
+
12
+ attr_accessor :ublast_db
13
+ attr_accessor :orfs_ublast_db
14
+ attr_accessor :protein
15
+
16
+ # number of bases in the assembly
17
+ attr_writer :n_bases
18
+
19
+ # assembly filename
20
+ attr_accessor :file
21
+
22
+ # Reuturn a new Assembly.
23
+ #
24
+ # - +:assembly+ - an array of Bio::Sequences
25
+ def initialize file
26
+ @file = file
27
+ @assembly = []
28
+ @n_bases = 0
29
+ Bio::FastaFormat.open(file).each do |entry|
30
+ @n_bases += entry.length
31
+ @assembly << entry
32
+ end
33
+ @assembly.sort_by! { |x| x.length }
34
+ end
35
+
36
+ # Return a new Assembly object by loading sequences
37
+ # from the FASTA-format +:file+
38
+ def self.stats_from_fasta file
39
+ a = Assembly.new file
40
+ a.basic_stats
41
+ end
42
+
43
+ # Return a hash of statistics about this assembly
44
+ def basic_stats
45
+ cumulative_length = 0.0
46
+ # we'll calculate Nx for all these x
47
+ x = [90, 70, 50, 30, 10]
48
+ x2 = x.clone
49
+ cutoff = x2.pop / 100.0
50
+ res = []
51
+ n1k = 0
52
+ n10k = 0
53
+ @assembly.each do |s|
54
+ new_cum_len = cumulative_length + s.length
55
+ prop = new_cum_len / self.n_bases
56
+ n1k += 1 if s.length > 1_000
57
+ n10k += 1 if s.length > 10_000
58
+ if prop >= cutoff
59
+ res << s.length
60
+ break if x2.empty?
61
+ cutoff = x2.pop / 100.0
62
+ end
63
+ cumulative_length = new_cum_len
64
+ end
65
+ mean = cumulative_length / @assembly.size
66
+ ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
67
+ {
68
+ "n_seqs" => @assembly.size,
69
+ "smallest" => @assembly.first.length,
70
+ "largest" => @assembly.last.length,
71
+ "n_bases" => @n_bases,
72
+ "mean_len" => mean,
73
+ "n > 1k" => n1k,
74
+ "n > 10k" => n10k
75
+ }.merge ns
76
+ end
77
+
78
+ # return the number of bases in the assembly, calculating
79
+ # from the assembly if it hasn't already been done.
80
+ def n_bases
81
+ unless @n_bases
82
+ @n_bases = 0
83
+ @assembly.each { |s| @n_bases += s.length }
84
+ end
85
+ @n_bases
86
+ end
87
+
88
+ def print_stats
89
+ self.basic_stats.map do |k, v|
90
+ "#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
91
+ end.join("\n")
92
+ end
93
+
94
+ end # Assembly
@@ -0,0 +1,44 @@
1
+ module Transrate
2
+
3
+ class Bowtie2
4
+
5
+ require 'which'
6
+ include Which
7
+
8
+ def initialize
9
+ bowtie2_path = which('bowtie2')
10
+ raise "could not find bowtie2 in the path" if bowtie2_path.empty?
11
+ @bowtie2 = bowtie2_path.first
12
+ bowtie2_build_path = which('bowtie2-build')
13
+ raise "could not find bowtie2-build in the path" if bowtie2_build_path.empty?
14
+ @bowtie2_build = bowtie2_build_path.first
15
+ end
16
+
17
+ def map_reads file, left, right=nil, insertsize=200, insertsd=50, outputname=nil
18
+ lbase = File.basename(left)
19
+ rbase = File.basename(right)
20
+ outputname ||= "#{lbase}.#{rbase}.sam"
21
+ realistic_dist = insertsize + (3 * insertsd)
22
+ unless File.exists? outputname
23
+ # construct bowtie command
24
+ bowtiecmd = "#{@bowtie2} -k 3 -p 8 -X #{realistic_dist}"
25
+ bowtiecmd += " --no-unal --local --quiet"
26
+ bowtiecmd += " #{File.basename(file)} -1 #{left}"
27
+ # paired end?
28
+ bowtiecmd += " -2 #{right}" if right
29
+ bowtiecmd += " > #{outputname}"
30
+ # run bowtie
31
+ `#{bowtiecmd}`
32
+ end
33
+ outputname
34
+ end
35
+
36
+ def build_index file
37
+ unless File.exists?(file + '.1.bt2')
38
+ `#{@bowtie2_build} --offrate 1 #{file} #{File.basename(file)}`
39
+ end
40
+ end
41
+
42
+ end # Bowtie2
43
+
44
+ end # Transrate
@@ -0,0 +1,46 @@
1
+ require 'set'
2
+
3
+ module Transrate
4
+
5
+ class ComparativeMetrics
6
+
7
+ def initialize assembly, reference
8
+ @assembly = assembly
9
+ @reference = reference
10
+ @usearch = Usearch.new
11
+ end
12
+
13
+ def run
14
+ rbu = self.reciprocal_best_ublast
15
+ ohr = self.ortholog_hit_ratio rbu
16
+ cf = self.collapse_factor @ra.l2r_hits
17
+ {
18
+ :reciprocal_hits => rbu.size,
19
+ :ortholog_hit_ratio => ohr,
20
+ :collapse_factor => cf
21
+ }
22
+ end
23
+
24
+ def reciprocal_best_ublast
25
+ @ra = ReciprocalAnnotation.new @assembly, @reference
26
+ @ra.run
27
+ end
28
+
29
+ def ortholog_hit_ratio rbu
30
+ rbu.reduce(0.0){ |sum, hit| sum += hit.last.tcov.to_f } / rbu.size
31
+ end
32
+
33
+ def collapse_factor hits
34
+ targets = {}
35
+ hits.each_pair do |query, hit|
36
+ unless targets.has_key? query
37
+ targets[query] = Set.new
38
+ end
39
+ targets[query] << hit.target
40
+ end
41
+ targets.values.reduce(0.0){ |sum, val| sum += val.size } / targets.size
42
+ end
43
+
44
+ end # ComparativeMetrics
45
+
46
+ end # Transrate
@@ -0,0 +1,33 @@
1
+ module Transrate
2
+
3
+ class RBHit
4
+
5
+ # Fields: query id, subject id, % identity, alignment length, mismatches,
6
+ # gap opens, q. start, q. end, s. start, s. end, evalue, bit score
7
+ attr_accessor :query, :target, :id, :alnlen, :mismatches
8
+ attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
9
+ attr_accessor :bitscore, :tcov
10
+
11
+ def initialize(list)
12
+ @query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
13
+ @target = list[1].scan(/[^|]+/).first.split.first
14
+ @id = list[2]
15
+ @alnlen = list[3]
16
+ @mismatches = list[4]
17
+ @gaps = list[5]
18
+ @qstart = list[6]
19
+ @qend = list[7]
20
+ @tstart = list[8]
21
+ @tend = list[9]
22
+ @evalue = list[10]
23
+ @bitscore = list[11]
24
+ @tcov = list[12]
25
+ end
26
+
27
+ def to_s
28
+ @query + " => " + @target
29
+ end
30
+
31
+ end # RBHit
32
+
33
+ end # Transrate
@@ -0,0 +1,158 @@
1
+ module Transrate
2
+
3
+ class ReadMetrics
4
+
5
+ def initialize assembly
6
+ @assembly = assembly
7
+ @mapper = Bowtie2.new
8
+ self.initial_values
9
+ end
10
+
11
+ def run left, right, insertsize=200, insertsd=50
12
+ @mapper.build_index @assembly.file
13
+ samfile = @mapper.map_reads(@assembly.file,
14
+ left, right,
15
+ insertsize, insertsd)
16
+ self.analyse_read_mappings(samfile, insertsize, insertsd)
17
+ {
18
+ :total_mappings => @total,
19
+ :good_mappings => @good,
20
+ :bad_mappings => @bad,
21
+ :both_mapped => @both_mapped,
22
+ :properly_paired => @properly_paired,
23
+ :improper_paired => @improperly_paired,
24
+ :proper_orientation => @proper_orientation,
25
+ :improper_orientation => @improper_orientation,
26
+ :same_contig => @same_contig,
27
+ :realistic_overlap => @realistic_overlap,
28
+ :unrealistic_overlap => @unrealistic_overlap,
29
+ :realistic_fragment => @realistic_fragment,
30
+ :unrealistic_fragment => @unrealistic_fragment,
31
+ :potential_bridges => @supported_bridges
32
+ }
33
+ end
34
+
35
+ def analyse_read_mappings samfile, insertsize, insertsd, bridge=true
36
+ @bridges = {} if bridge
37
+ realistic_dist = self.realistic_distance(insertsize, insertsd)
38
+ if File.exists?(samfile) && File.size(samfile) > 0
39
+ ls = BetterSam.new
40
+ rs = BetterSam.new
41
+ sam = File.open(samfile).each_line
42
+ sam.each_slice(2) do |l, r|
43
+ if (l && r) && (ls.parse_line(l) && rs.parse_line(r))
44
+ self.check_read_pair(ls, rs, realistic_dist)
45
+ end
46
+ end
47
+ self.check_bridges if bridge
48
+ else
49
+ raise "samfile #{samfile} not found"
50
+ end
51
+ end
52
+
53
+ def initial_values
54
+ @total = 0
55
+ @good = 0
56
+ @bad = 0
57
+ @both_mapped = 0
58
+ @properly_paired = 0
59
+ @improperly_paired = 0
60
+ @proper_orientation = 0
61
+ @improper_orientation = 0
62
+ @same_contig = 0
63
+ @realistic_overlap = 0
64
+ @unrealistic_overlap = 0
65
+ @realistic_fragment = 0
66
+ @unrealistic_fragment = 0
67
+ end
68
+
69
+ def realistic_distance insertsize, insertsd
70
+ insertsize + (3 * insertsd)
71
+ end
72
+
73
+ def check_read_pair ls, rs, realistic_dist
74
+ return unless ls.primary_aln?
75
+ @total += 1
76
+ if ls.both_mapped?
77
+ # reads are paired
78
+ @both_mapped += 1
79
+ if ls.read_properly_paired?
80
+ # mapped in proper pair
81
+ @properly_paired += 1
82
+ self.check_orientation(ls, rs)
83
+ else
84
+ # not mapped in proper pair
85
+ @improperly_paired += 1
86
+ if ls.chrom == rs.chrom
87
+ # both on same contig
88
+ @same_contig += 1
89
+ self.check_overlap_plausibility(ls, rs)
90
+ else
91
+ self.check_fragment_plausibility(ls, rs, realistic_dist)
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ def check_orientation ls, rs
98
+ if ls.pair_opposite_strands?
99
+ # mates in proper orientation
100
+ @proper_orientation += 1
101
+ @good += 1
102
+ else
103
+ # mates in wrong orientation
104
+ @improper_orientation += 1
105
+ @bad += 1
106
+ end
107
+ end
108
+
109
+ def check_overlap_plausibility ls, rs
110
+ if Math.sqrt((ls.pos - rs.pos) ** 2) < ls.seq.length
111
+ # overlap is realistic
112
+ @realistic_overlap += 1
113
+ self.check_orientation(ls, rs)
114
+ else
115
+ # overlap not realistic
116
+ @unrealistic_overlap+= 1
117
+ @bad += 1
118
+ end
119
+ end
120
+
121
+ def check_fragment_plausibility ls, rs, realistic_dist
122
+ # mates on different contigs
123
+ # are the mapping positions within a realistic distance of
124
+ # the ends of contigs?
125
+ ldist = [ls.pos, ls.seq.length - ls.pos].min
126
+ rdist = [rs.pos, rs.seq.length - rs.pos].min
127
+ if ldist + rdist <= realistic_dist
128
+ # increase the evidence for this bridge
129
+ key = [ls.chrom, rs.chrom].sort.join("<>").to_sym
130
+ if @bridges.has_key? key
131
+ @bridges[key] += 1
132
+ else
133
+ @bridges[key] = 1
134
+ end
135
+ @realistic_fragment += 1
136
+ @good += 1
137
+ else
138
+ @unrealistic_fragment += 1
139
+ @bad += 1
140
+ end
141
+ end
142
+
143
+ def check_bridges
144
+ @supported_bridges = 0
145
+ CSV.open('supported_bridges.csv', 'w') do |f|
146
+ @bridges.each_pair do |b, count|
147
+ start, finish = b.to_s.split('<>')
148
+ if count > 1
149
+ f << [start, finish, count]
150
+ @supported_bridges += 1
151
+ end
152
+ end
153
+ end
154
+ end
155
+
156
+ end # ReadMetrics
157
+
158
+ end # Transrate
@@ -0,0 +1,91 @@
1
+ module Transrate
2
+
3
+ class ReciprocalAnnotation
4
+
5
+ attr_reader :l2r_hits
6
+ attr_reader :r2l_hits
7
+ attr_reader :results
8
+
9
+ def initialize assembly, reference
10
+ @assembly = assembly
11
+ @reference = reference
12
+ @usearch = Usearch.new
13
+ end
14
+
15
+ def run
16
+ self.make_assembly_db
17
+ self.make_reference_db
18
+ left2right, right2left = self.reciprocal_align
19
+ self.parse_results left2right, right2left
20
+ @results
21
+ end
22
+
23
+ def make_assembly_db
24
+ unless @assembly.orfs_ublast_db
25
+ assembly_base = File.basename(@assembly.file, ".*")
26
+ assembly_orfs = assembly_base + ".orfs"
27
+ @usearch.findorfs @assembly.file, assembly_orfs
28
+ assembly_db = assembly_base + ".udb"
29
+ @usearch.makeudb_ublast assembly_orfs, assembly_db
30
+ @assembly.orfs_ublast_db = assembly_db
31
+ end
32
+ end
33
+
34
+ def make_reference_db
35
+ unless @reference.ublast_db
36
+ reference_base = File.basename(@reference.file, ".*")
37
+ reference_db = reference_base + ".udb"
38
+ @usearch.makeudb_ublast @reference.file, reference_db
39
+ @reference.ublast_db = reference_db
40
+ end
41
+ end
42
+
43
+ def reciprocal_align
44
+ left2right = @usearch.ublast @assembly.file, @reference.ublast_db
45
+ right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
46
+ [left2right, right2left]
47
+ end
48
+
49
+ def parse_results left2right, right2left
50
+ l2r_results = self.load_results_file left2right
51
+ r2l_results = self.load_results_file right2left
52
+ @l2r_hits = self.results_to_hits l2r_results
53
+ @r2l_hits = self.results_to_hits r2l_results
54
+ @results = {}
55
+ @l2r_hits.each_pair do |query, best|
56
+ next if best.nil?
57
+ tbest = @r2l_hits[best.target]
58
+ next if tbest.nil?
59
+ @results[query] = best if query == tbest.target
60
+ end
61
+ end
62
+
63
+ def results_to_hits results
64
+ hits = {}
65
+ results.each do |hit|
66
+ if hits.has_key? hit.query
67
+ old_hit = hits[hit.query]
68
+ old_eval, old_bits = old_hit.evalue, old_hit.bitscore
69
+ if hit.bitscore > old_bits
70
+ hits[hit.query] = hit
71
+ elsif hit.bitscore == old_bits && hit.evalue < old_eval
72
+ hits[hit.query] = hit
73
+ end
74
+ else
75
+ hits[hit.query] = hit
76
+ end
77
+ end
78
+ hits
79
+ end
80
+
81
+ def load_results_file file
82
+ results = []
83
+ File.open(file).each_line do |line|
84
+ results << RBHit.new(line.chomp.split("\t"))
85
+ end
86
+ results
87
+ end
88
+
89
+ end # ReciprocalAnnotation
90
+
91
+ end # Transrate
@@ -0,0 +1,55 @@
1
+ module Transrate
2
+
3
+ class Usearch
4
+
5
+ require 'which'
6
+ include Which
7
+
8
+ def initialize threads=8
9
+ @threads = threads
10
+ paths = which('usearch')
11
+ if paths.empty?
12
+ raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
13
+ end
14
+ @cmd = paths.first
15
+ end
16
+
17
+ def custom_output_fields
18
+ " -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
19
+ end
20
+
21
+ def ublast query, target, evalue="1e-5"
22
+ subcmd = " -ublast #{query}"
23
+ subcmd += " -db #{target}"
24
+ subcmd += " -evalue #{evalue}"
25
+ blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
26
+ subcmd += " -userout #{blast6outfile}"
27
+ subcmd += self.custom_output_fields
28
+ subcmd += " -strand both"
29
+ subcmd += " -threads #{@threads}"
30
+ self.run subcmd
31
+ blast6outfile
32
+ end
33
+
34
+ def makeudb_ublast filepath, output
35
+ subcmd = " -makeudb_ublast #{filepath}"
36
+ subcmd += " -output #{output}"
37
+ self.run subcmd
38
+ end
39
+
40
+ def findorfs filepath, output
41
+ subcmd = " -findorfs #{filepath}"
42
+ subcmd += " -output #{output}"
43
+ subcmd += " -xlat"
44
+ subcmd += " -orfstyle 7"
45
+ self.run subcmd
46
+ end
47
+
48
+ def run subcmd
49
+ subcmd += " -quiet"
50
+ `#{@cmd}#{subcmd}`
51
+ end
52
+
53
+ end # Usearch
54
+
55
+ end # Transrate
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ module Transrate
4
+ module VERSION
5
+ MAJOR = 0
6
+ MINOR = 0
7
+ PATCH = 1
8
+ BUILD = nil
9
+
10
+ STRING = [MAJOR, MINOR, PATCH, BUILD].compact.join('.')
11
+ end
12
+ end # Transrate
@@ -0,0 +1,28 @@
1
+ # -*- encoding: utf-8 -*-
2
+ $:.push File.expand_path("../lib", __FILE__)
3
+ require File.expand_path('../lib/transrate/version', __FILE__)
4
+
5
+ Gem::Specification.new do |gem|
6
+ gem.name = 'transrate'
7
+ gem.authors = [ "Richard Smith" ]
8
+ gem.email = "rds45@cam.ac.uk"
9
+ gem.homepage = 'https://github.com/blahah/assemblotron'
10
+ gem.summary = %q{ quality assessment of de-novo transcriptome assemblies }
11
+ gem.description = %q{ a library and command-line tool for quality assessment of de-novo transcriptome assemblies }
12
+ gem.version = Transrate::VERSION::STRING.dup
13
+
14
+ gem.files = `git ls-files`.split("\n")
15
+ gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
16
+ gem.require_paths = %w[ lib ]
17
+
18
+ gem.add_dependency 'rake', '~> 10.1.0'
19
+ gem.add_dependency 'trollop', '~> 2.0'
20
+ gem.add_dependency 'which'
21
+ gem.add_dependency 'bio'
22
+ gem.add_dependency 'bettersam', '~> 0.0.1.alpha'
23
+
24
+ gem.add_development_dependency 'turn'
25
+ gem.add_development_dependency 'simplecov'
26
+ gem.add_development_dependency 'shoulda-context'
27
+ gem.add_development_dependency 'coveralls', '~> 0.6.7'
28
+ end
metadata ADDED
@@ -0,0 +1,206 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: transrate
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Richard Smith
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-09-17 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: rake
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ~>
20
+ - !ruby/object:Gem::Version
21
+ version: 10.1.0
22
+ type: :runtime
23
+ prerelease: false
24
+ version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ~>
28
+ - !ruby/object:Gem::Version
29
+ version: 10.1.0
30
+ - !ruby/object:Gem::Dependency
31
+ name: trollop
32
+ requirement: !ruby/object:Gem::Requirement
33
+ none: false
34
+ requirements:
35
+ - - ~>
36
+ - !ruby/object:Gem::Version
37
+ version: '2.0'
38
+ type: :runtime
39
+ prerelease: false
40
+ version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ~>
44
+ - !ruby/object:Gem::Version
45
+ version: '2.0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: which
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :runtime
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
60
+ - !ruby/object:Gem::Version
61
+ version: '0'
62
+ - !ruby/object:Gem::Dependency
63
+ name: bio
64
+ requirement: !ruby/object:Gem::Requirement
65
+ none: false
66
+ requirements:
67
+ - - ! '>='
68
+ - !ruby/object:Gem::Version
69
+ version: '0'
70
+ type: :runtime
71
+ prerelease: false
72
+ version_requirements: !ruby/object:Gem::Requirement
73
+ none: false
74
+ requirements:
75
+ - - ! '>='
76
+ - !ruby/object:Gem::Version
77
+ version: '0'
78
+ - !ruby/object:Gem::Dependency
79
+ name: bettersam
80
+ requirement: !ruby/object:Gem::Requirement
81
+ none: false
82
+ requirements:
83
+ - - ~>
84
+ - !ruby/object:Gem::Version
85
+ version: 0.0.1.alpha
86
+ type: :runtime
87
+ prerelease: false
88
+ version_requirements: !ruby/object:Gem::Requirement
89
+ none: false
90
+ requirements:
91
+ - - ~>
92
+ - !ruby/object:Gem::Version
93
+ version: 0.0.1.alpha
94
+ - !ruby/object:Gem::Dependency
95
+ name: turn
96
+ requirement: !ruby/object:Gem::Requirement
97
+ none: false
98
+ requirements:
99
+ - - ! '>='
100
+ - !ruby/object:Gem::Version
101
+ version: '0'
102
+ type: :development
103
+ prerelease: false
104
+ version_requirements: !ruby/object:Gem::Requirement
105
+ none: false
106
+ requirements:
107
+ - - ! '>='
108
+ - !ruby/object:Gem::Version
109
+ version: '0'
110
+ - !ruby/object:Gem::Dependency
111
+ name: simplecov
112
+ requirement: !ruby/object:Gem::Requirement
113
+ none: false
114
+ requirements:
115
+ - - ! '>='
116
+ - !ruby/object:Gem::Version
117
+ version: '0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ none: false
122
+ requirements:
123
+ - - ! '>='
124
+ - !ruby/object:Gem::Version
125
+ version: '0'
126
+ - !ruby/object:Gem::Dependency
127
+ name: shoulda-context
128
+ requirement: !ruby/object:Gem::Requirement
129
+ none: false
130
+ requirements:
131
+ - - ! '>='
132
+ - !ruby/object:Gem::Version
133
+ version: '0'
134
+ type: :development
135
+ prerelease: false
136
+ version_requirements: !ruby/object:Gem::Requirement
137
+ none: false
138
+ requirements:
139
+ - - ! '>='
140
+ - !ruby/object:Gem::Version
141
+ version: '0'
142
+ - !ruby/object:Gem::Dependency
143
+ name: coveralls
144
+ requirement: !ruby/object:Gem::Requirement
145
+ none: false
146
+ requirements:
147
+ - - ~>
148
+ - !ruby/object:Gem::Version
149
+ version: 0.6.7
150
+ type: :development
151
+ prerelease: false
152
+ version_requirements: !ruby/object:Gem::Requirement
153
+ none: false
154
+ requirements:
155
+ - - ~>
156
+ - !ruby/object:Gem::Version
157
+ version: 0.6.7
158
+ description: ! ' a library and command-line tool for quality assessment of de-novo
159
+ transcriptome assemblies '
160
+ email: rds45@cam.ac.uk
161
+ executables:
162
+ - transrate
163
+ extensions: []
164
+ extra_rdoc_files: []
165
+ files:
166
+ - .gitignore
167
+ - Gemfile
168
+ - Gemfile.lock
169
+ - LICENSE
170
+ - README.md
171
+ - bin/transrate
172
+ - lib/transrate.rb
173
+ - lib/transrate/assembly.rb
174
+ - lib/transrate/bowtie2.rb
175
+ - lib/transrate/comparative_metrics.rb
176
+ - lib/transrate/rb_hit.rb
177
+ - lib/transrate/read_metrics.rb
178
+ - lib/transrate/reciprocal_annotation.rb
179
+ - lib/transrate/usearch.rb
180
+ - lib/transrate/version.rb
181
+ - transrate.gemspec
182
+ homepage: https://github.com/blahah/assemblotron
183
+ licenses: []
184
+ post_install_message:
185
+ rdoc_options: []
186
+ require_paths:
187
+ - lib
188
+ required_ruby_version: !ruby/object:Gem::Requirement
189
+ none: false
190
+ requirements:
191
+ - - ! '>='
192
+ - !ruby/object:Gem::Version
193
+ version: '0'
194
+ required_rubygems_version: !ruby/object:Gem::Requirement
195
+ none: false
196
+ requirements:
197
+ - - ! '>='
198
+ - !ruby/object:Gem::Version
199
+ version: '0'
200
+ requirements: []
201
+ rubyforge_project:
202
+ rubygems_version: 1.8.24
203
+ signing_key:
204
+ specification_version: 3
205
+ summary: quality assessment of de-novo transcriptome assemblies
206
+ test_files: []