transrate 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +3 -0
- data/Gemfile.lock +52 -0
- data/LICENSE +20 -0
- data/README.md +40 -0
- data/bin/transrate +95 -0
- data/lib/transrate.rb +8 -0
- data/lib/transrate/assembly.rb +94 -0
- data/lib/transrate/bowtie2.rb +44 -0
- data/lib/transrate/comparative_metrics.rb +46 -0
- data/lib/transrate/rb_hit.rb +33 -0
- data/lib/transrate/read_metrics.rb +158 -0
- data/lib/transrate/reciprocal_annotation.rb +91 -0
- data/lib/transrate/usearch.rb +55 -0
- data/lib/transrate/version.rb +12 -0
- data/transrate.gemspec +28 -0
- metadata +206 -0
data/.gitignore
ADDED
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
transrate (0.0.1)
|
5
|
+
bettersam
|
6
|
+
bio
|
7
|
+
rake (~> 10.1.0)
|
8
|
+
trollop (~> 2.0)
|
9
|
+
which
|
10
|
+
|
11
|
+
GEM
|
12
|
+
remote: https://rubygems.org/
|
13
|
+
specs:
|
14
|
+
ansi (1.4.3)
|
15
|
+
bettersam (0.0.1.alpha)
|
16
|
+
bio (1.4.3)
|
17
|
+
colorize (0.5.8)
|
18
|
+
coveralls (0.6.7)
|
19
|
+
colorize
|
20
|
+
multi_json (~> 1.3)
|
21
|
+
rest-client
|
22
|
+
simplecov (>= 0.7)
|
23
|
+
thor
|
24
|
+
facade (1.0.5)
|
25
|
+
mime-types (1.23)
|
26
|
+
multi_json (1.7.7)
|
27
|
+
pathname2 (1.6.5)
|
28
|
+
facade
|
29
|
+
rake (10.1.0)
|
30
|
+
rest-client (1.6.7)
|
31
|
+
mime-types (>= 1.16)
|
32
|
+
shoulda-context (1.1.5)
|
33
|
+
simplecov (0.7.1)
|
34
|
+
multi_json (~> 1.0)
|
35
|
+
simplecov-html (~> 0.7.1)
|
36
|
+
simplecov-html (0.7.1)
|
37
|
+
thor (0.16.0)
|
38
|
+
trollop (2.0)
|
39
|
+
turn (0.9.6)
|
40
|
+
ansi
|
41
|
+
which (0.0.2)
|
42
|
+
pathname2 (>= 1.4.4)
|
43
|
+
|
44
|
+
PLATFORMS
|
45
|
+
ruby
|
46
|
+
|
47
|
+
DEPENDENCIES
|
48
|
+
coveralls (~> 0.6.7)
|
49
|
+
shoulda-context
|
50
|
+
simplecov
|
51
|
+
transrate!
|
52
|
+
turn
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2013 Richard Smith
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
6
|
+
this software and associated documentation files (the "Software"), to deal in
|
7
|
+
the Software without restriction, including without limitation the rights to
|
8
|
+
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
|
9
|
+
the Software, and to permit persons to whom the Software is furnished to do so,
|
10
|
+
subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
17
|
+
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
|
18
|
+
COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
|
19
|
+
IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
20
|
+
CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
Transrate
|
2
|
+
----
|
3
|
+
|
4
|
+
Quality analyis and comparison of transcriptome assemblies.
|
5
|
+
|
6
|
+
|
7
|
+
## Installation
|
8
|
+
|
9
|
+
You can install transrate very easily. Just run at the terminal:
|
10
|
+
|
11
|
+
`gem install transrate`
|
12
|
+
|
13
|
+
If that doesn't work, check the requirements below...
|
14
|
+
|
15
|
+
## Usage
|
16
|
+
|
17
|
+
|
18
|
+
## Requirements
|
19
|
+
|
20
|
+
### Ruby
|
21
|
+
|
22
|
+
First, you'll need Ruby v1.9.3 or greater installed. You can check with:
|
23
|
+
|
24
|
+
`ruby --version`
|
25
|
+
|
26
|
+
If you don't have Ruby installed, or you need a higher version, I recommend using [RVM](http://rvm.io/) as your Ruby Version Manager. To install RVM along with the latest ruby, just run:
|
27
|
+
|
28
|
+
`\curl -L https://get.rvm.io | bash -s stable`
|
29
|
+
|
30
|
+
### Rubygems
|
31
|
+
|
32
|
+
Your Ruby installation *should* come with RubyGems, the package manager for Ruby. You can check with:
|
33
|
+
|
34
|
+
`gem --version`
|
35
|
+
|
36
|
+
If you don't have it installed, I recommend installing the latest version of Ruby and RubyGems using the RVM instructions above (in the Requirements:Ruby section.
|
37
|
+
|
38
|
+
## Development status
|
39
|
+
|
40
|
+
This software is in very early development. Nevertheless, we welcome bug reports.
|
data/bin/transrate
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'trollop'
|
4
|
+
require 'transrate'
|
5
|
+
|
6
|
+
opts = Trollop::options do
|
7
|
+
version "v0.0.1a"
|
8
|
+
banner <<-EOS
|
9
|
+
|
10
|
+
Transrate v0.0.1a by Richard Smith <rds45@cam.ac.uk>
|
11
|
+
|
12
|
+
DESCRIPTION:
|
13
|
+
Analyse a de-novo transcriptome
|
14
|
+
assembly using three kinds of metrics:
|
15
|
+
|
16
|
+
1. contig-based
|
17
|
+
2. read-mapping
|
18
|
+
3. reference-based
|
19
|
+
|
20
|
+
Please make sure USEARCH and bowtie2 are both installed
|
21
|
+
and in the PATH.
|
22
|
+
|
23
|
+
Bug reports and feature requests at:
|
24
|
+
http://github.com/blahah/transrate
|
25
|
+
|
26
|
+
USAGE:
|
27
|
+
transrate <options>
|
28
|
+
|
29
|
+
OPTIONS:
|
30
|
+
|
31
|
+
EOS
|
32
|
+
opt :assembly, "assembly file in FASTA format", :required => true, :type => String
|
33
|
+
opt :reference, "reference proteome file in FASTA format", :required => true, :type => String
|
34
|
+
opt :left, "left reads file in FASTQ format", :type => String
|
35
|
+
opt :right, "right reads file in FASTQ format", :type => String
|
36
|
+
opt :insertsize, "mean insert size", :default => 200, :type => Integer
|
37
|
+
opt :insertsd, "insert size standard deviation", :default => 50, :type => Integer
|
38
|
+
opt :threads, "number of threads to use", :default => 8, :type => Integer
|
39
|
+
end
|
40
|
+
|
41
|
+
def pretty_print_hash hash, width
|
42
|
+
hash.map{ |k, v| "#{k.to_s}#{" " * (width - (k.length + v.to_i.to_s.length))}#{v.to_i}" }.join("\n")
|
43
|
+
end
|
44
|
+
|
45
|
+
include Transrate
|
46
|
+
|
47
|
+
a = Assembly.new opts.assembly
|
48
|
+
r = Assembly.new opts.reference
|
49
|
+
|
50
|
+
puts "\n\nAnalysing assembly: #{opts.assembly}\n\n"
|
51
|
+
|
52
|
+
puts "calculating contig stats..."
|
53
|
+
t0 = Time.now
|
54
|
+
contig_results = a.basic_stats
|
55
|
+
puts "...done in #{Time.now - t0} seconds"
|
56
|
+
|
57
|
+
read_results = nil
|
58
|
+
if (opts.left && opts.right)
|
59
|
+
puts "\ncalculating read diagnostics..."
|
60
|
+
t0 = Time.now
|
61
|
+
read_metrics = ReadMetrics.new a
|
62
|
+
read_results = read_metrics.run(opts.left, opts.right)
|
63
|
+
puts "...done in #{Time.now - t0} seconds"
|
64
|
+
else
|
65
|
+
puts "\nno reads provided, skipping read diagnostics"
|
66
|
+
end
|
67
|
+
|
68
|
+
puts "\ncalculating comparative metrics..."
|
69
|
+
t0 = Time.now
|
70
|
+
comparative_metrics = ComparativeMetrics.new(a, r)
|
71
|
+
comparative_results = comparative_metrics.run
|
72
|
+
puts "...done in #{Time.now - t0} seconds"
|
73
|
+
|
74
|
+
report_width = 30
|
75
|
+
|
76
|
+
if contig_results
|
77
|
+
puts "\n\n"
|
78
|
+
puts "Contig metrics:"
|
79
|
+
puts "-" * report_width
|
80
|
+
puts pretty_print_hash(contig_results, report_width)
|
81
|
+
end
|
82
|
+
|
83
|
+
if read_results
|
84
|
+
puts "\n\n"
|
85
|
+
puts "Read mapping metrics:"
|
86
|
+
puts "-" * report_width
|
87
|
+
puts pretty_print_hash(read_results, report_width)
|
88
|
+
end
|
89
|
+
|
90
|
+
if comparative_results
|
91
|
+
puts "\n\n"
|
92
|
+
puts "Comparative metrics:"
|
93
|
+
puts "-" * report_width
|
94
|
+
puts pretty_print_hash(comparative_results, report_width)
|
95
|
+
end
|
data/lib/transrate.rb
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'transrate/version'
|
2
|
+
require 'transrate/assembly'
|
3
|
+
require 'transrate/bowtie2'
|
4
|
+
require 'transrate/read_metrics'
|
5
|
+
require 'transrate/usearch'
|
6
|
+
require 'transrate/rb_hit'
|
7
|
+
require 'transrate/reciprocal_annotation'
|
8
|
+
require 'transrate/comparative_metrics'
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'bio'
|
2
|
+
require 'bettersam'
|
3
|
+
require 'csv'
|
4
|
+
require 'forwardable'
|
5
|
+
|
6
|
+
class Assembly
|
7
|
+
|
8
|
+
include Enumerable
|
9
|
+
extend Forwardable
|
10
|
+
def_delegators :@assembly, :each, :<<
|
11
|
+
|
12
|
+
attr_accessor :ublast_db
|
13
|
+
attr_accessor :orfs_ublast_db
|
14
|
+
attr_accessor :protein
|
15
|
+
|
16
|
+
# number of bases in the assembly
|
17
|
+
attr_writer :n_bases
|
18
|
+
|
19
|
+
# assembly filename
|
20
|
+
attr_accessor :file
|
21
|
+
|
22
|
+
# Reuturn a new Assembly.
|
23
|
+
#
|
24
|
+
# - +:assembly+ - an array of Bio::Sequences
|
25
|
+
def initialize file
|
26
|
+
@file = file
|
27
|
+
@assembly = []
|
28
|
+
@n_bases = 0
|
29
|
+
Bio::FastaFormat.open(file).each do |entry|
|
30
|
+
@n_bases += entry.length
|
31
|
+
@assembly << entry
|
32
|
+
end
|
33
|
+
@assembly.sort_by! { |x| x.length }
|
34
|
+
end
|
35
|
+
|
36
|
+
# Return a new Assembly object by loading sequences
|
37
|
+
# from the FASTA-format +:file+
|
38
|
+
def self.stats_from_fasta file
|
39
|
+
a = Assembly.new file
|
40
|
+
a.basic_stats
|
41
|
+
end
|
42
|
+
|
43
|
+
# Return a hash of statistics about this assembly
|
44
|
+
def basic_stats
|
45
|
+
cumulative_length = 0.0
|
46
|
+
# we'll calculate Nx for all these x
|
47
|
+
x = [90, 70, 50, 30, 10]
|
48
|
+
x2 = x.clone
|
49
|
+
cutoff = x2.pop / 100.0
|
50
|
+
res = []
|
51
|
+
n1k = 0
|
52
|
+
n10k = 0
|
53
|
+
@assembly.each do |s|
|
54
|
+
new_cum_len = cumulative_length + s.length
|
55
|
+
prop = new_cum_len / self.n_bases
|
56
|
+
n1k += 1 if s.length > 1_000
|
57
|
+
n10k += 1 if s.length > 10_000
|
58
|
+
if prop >= cutoff
|
59
|
+
res << s.length
|
60
|
+
break if x2.empty?
|
61
|
+
cutoff = x2.pop / 100.0
|
62
|
+
end
|
63
|
+
cumulative_length = new_cum_len
|
64
|
+
end
|
65
|
+
mean = cumulative_length / @assembly.size
|
66
|
+
ns = Hash[x.map { |n| "N#{n}" }.zip(res)]
|
67
|
+
{
|
68
|
+
"n_seqs" => @assembly.size,
|
69
|
+
"smallest" => @assembly.first.length,
|
70
|
+
"largest" => @assembly.last.length,
|
71
|
+
"n_bases" => @n_bases,
|
72
|
+
"mean_len" => mean,
|
73
|
+
"n > 1k" => n1k,
|
74
|
+
"n > 10k" => n10k
|
75
|
+
}.merge ns
|
76
|
+
end
|
77
|
+
|
78
|
+
# return the number of bases in the assembly, calculating
|
79
|
+
# from the assembly if it hasn't already been done.
|
80
|
+
def n_bases
|
81
|
+
unless @n_bases
|
82
|
+
@n_bases = 0
|
83
|
+
@assembly.each { |s| @n_bases += s.length }
|
84
|
+
end
|
85
|
+
@n_bases
|
86
|
+
end
|
87
|
+
|
88
|
+
def print_stats
|
89
|
+
self.basic_stats.map do |k, v|
|
90
|
+
"#{k}#{" " * (20 - (k.length + v.to_i.to_s.length))}#{v.to_i}"
|
91
|
+
end.join("\n")
|
92
|
+
end
|
93
|
+
|
94
|
+
end # Assembly
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class Bowtie2
|
4
|
+
|
5
|
+
require 'which'
|
6
|
+
include Which
|
7
|
+
|
8
|
+
def initialize
|
9
|
+
bowtie2_path = which('bowtie2')
|
10
|
+
raise "could not find bowtie2 in the path" if bowtie2_path.empty?
|
11
|
+
@bowtie2 = bowtie2_path.first
|
12
|
+
bowtie2_build_path = which('bowtie2-build')
|
13
|
+
raise "could not find bowtie2-build in the path" if bowtie2_build_path.empty?
|
14
|
+
@bowtie2_build = bowtie2_build_path.first
|
15
|
+
end
|
16
|
+
|
17
|
+
def map_reads file, left, right=nil, insertsize=200, insertsd=50, outputname=nil
|
18
|
+
lbase = File.basename(left)
|
19
|
+
rbase = File.basename(right)
|
20
|
+
outputname ||= "#{lbase}.#{rbase}.sam"
|
21
|
+
realistic_dist = insertsize + (3 * insertsd)
|
22
|
+
unless File.exists? outputname
|
23
|
+
# construct bowtie command
|
24
|
+
bowtiecmd = "#{@bowtie2} -k 3 -p 8 -X #{realistic_dist}"
|
25
|
+
bowtiecmd += " --no-unal --local --quiet"
|
26
|
+
bowtiecmd += " #{File.basename(file)} -1 #{left}"
|
27
|
+
# paired end?
|
28
|
+
bowtiecmd += " -2 #{right}" if right
|
29
|
+
bowtiecmd += " > #{outputname}"
|
30
|
+
# run bowtie
|
31
|
+
`#{bowtiecmd}`
|
32
|
+
end
|
33
|
+
outputname
|
34
|
+
end
|
35
|
+
|
36
|
+
def build_index file
|
37
|
+
unless File.exists?(file + '.1.bt2')
|
38
|
+
`#{@bowtie2_build} --offrate 1 #{file} #{File.basename(file)}`
|
39
|
+
end
|
40
|
+
end
|
41
|
+
|
42
|
+
end # Bowtie2
|
43
|
+
|
44
|
+
end # Transrate
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'set'
|
2
|
+
|
3
|
+
module Transrate
|
4
|
+
|
5
|
+
class ComparativeMetrics
|
6
|
+
|
7
|
+
def initialize assembly, reference
|
8
|
+
@assembly = assembly
|
9
|
+
@reference = reference
|
10
|
+
@usearch = Usearch.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def run
|
14
|
+
rbu = self.reciprocal_best_ublast
|
15
|
+
ohr = self.ortholog_hit_ratio rbu
|
16
|
+
cf = self.collapse_factor @ra.l2r_hits
|
17
|
+
{
|
18
|
+
:reciprocal_hits => rbu.size,
|
19
|
+
:ortholog_hit_ratio => ohr,
|
20
|
+
:collapse_factor => cf
|
21
|
+
}
|
22
|
+
end
|
23
|
+
|
24
|
+
def reciprocal_best_ublast
|
25
|
+
@ra = ReciprocalAnnotation.new @assembly, @reference
|
26
|
+
@ra.run
|
27
|
+
end
|
28
|
+
|
29
|
+
def ortholog_hit_ratio rbu
|
30
|
+
rbu.reduce(0.0){ |sum, hit| sum += hit.last.tcov.to_f } / rbu.size
|
31
|
+
end
|
32
|
+
|
33
|
+
def collapse_factor hits
|
34
|
+
targets = {}
|
35
|
+
hits.each_pair do |query, hit|
|
36
|
+
unless targets.has_key? query
|
37
|
+
targets[query] = Set.new
|
38
|
+
end
|
39
|
+
targets[query] << hit.target
|
40
|
+
end
|
41
|
+
targets.values.reduce(0.0){ |sum, val| sum += val.size } / targets.size
|
42
|
+
end
|
43
|
+
|
44
|
+
end # ComparativeMetrics
|
45
|
+
|
46
|
+
end # Transrate
|
@@ -0,0 +1,33 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class RBHit
|
4
|
+
|
5
|
+
# Fields: query id, subject id, % identity, alignment length, mismatches,
|
6
|
+
# gap opens, q. start, q. end, s. start, s. end, evalue, bit score
|
7
|
+
attr_accessor :query, :target, :id, :alnlen, :mismatches
|
8
|
+
attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
|
9
|
+
attr_accessor :bitscore, :tcov
|
10
|
+
|
11
|
+
def initialize(list)
|
12
|
+
@query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
|
13
|
+
@target = list[1].scan(/[^|]+/).first.split.first
|
14
|
+
@id = list[2]
|
15
|
+
@alnlen = list[3]
|
16
|
+
@mismatches = list[4]
|
17
|
+
@gaps = list[5]
|
18
|
+
@qstart = list[6]
|
19
|
+
@qend = list[7]
|
20
|
+
@tstart = list[8]
|
21
|
+
@tend = list[9]
|
22
|
+
@evalue = list[10]
|
23
|
+
@bitscore = list[11]
|
24
|
+
@tcov = list[12]
|
25
|
+
end
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
@query + " => " + @target
|
29
|
+
end
|
30
|
+
|
31
|
+
end # RBHit
|
32
|
+
|
33
|
+
end # Transrate
|
@@ -0,0 +1,158 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class ReadMetrics
|
4
|
+
|
5
|
+
def initialize assembly
|
6
|
+
@assembly = assembly
|
7
|
+
@mapper = Bowtie2.new
|
8
|
+
self.initial_values
|
9
|
+
end
|
10
|
+
|
11
|
+
def run left, right, insertsize=200, insertsd=50
|
12
|
+
@mapper.build_index @assembly.file
|
13
|
+
samfile = @mapper.map_reads(@assembly.file,
|
14
|
+
left, right,
|
15
|
+
insertsize, insertsd)
|
16
|
+
self.analyse_read_mappings(samfile, insertsize, insertsd)
|
17
|
+
{
|
18
|
+
:total_mappings => @total,
|
19
|
+
:good_mappings => @good,
|
20
|
+
:bad_mappings => @bad,
|
21
|
+
:both_mapped => @both_mapped,
|
22
|
+
:properly_paired => @properly_paired,
|
23
|
+
:improper_paired => @improperly_paired,
|
24
|
+
:proper_orientation => @proper_orientation,
|
25
|
+
:improper_orientation => @improper_orientation,
|
26
|
+
:same_contig => @same_contig,
|
27
|
+
:realistic_overlap => @realistic_overlap,
|
28
|
+
:unrealistic_overlap => @unrealistic_overlap,
|
29
|
+
:realistic_fragment => @realistic_fragment,
|
30
|
+
:unrealistic_fragment => @unrealistic_fragment,
|
31
|
+
:potential_bridges => @supported_bridges
|
32
|
+
}
|
33
|
+
end
|
34
|
+
|
35
|
+
def analyse_read_mappings samfile, insertsize, insertsd, bridge=true
|
36
|
+
@bridges = {} if bridge
|
37
|
+
realistic_dist = self.realistic_distance(insertsize, insertsd)
|
38
|
+
if File.exists?(samfile) && File.size(samfile) > 0
|
39
|
+
ls = BetterSam.new
|
40
|
+
rs = BetterSam.new
|
41
|
+
sam = File.open(samfile).each_line
|
42
|
+
sam.each_slice(2) do |l, r|
|
43
|
+
if (l && r) && (ls.parse_line(l) && rs.parse_line(r))
|
44
|
+
self.check_read_pair(ls, rs, realistic_dist)
|
45
|
+
end
|
46
|
+
end
|
47
|
+
self.check_bridges if bridge
|
48
|
+
else
|
49
|
+
raise "samfile #{samfile} not found"
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def initial_values
|
54
|
+
@total = 0
|
55
|
+
@good = 0
|
56
|
+
@bad = 0
|
57
|
+
@both_mapped = 0
|
58
|
+
@properly_paired = 0
|
59
|
+
@improperly_paired = 0
|
60
|
+
@proper_orientation = 0
|
61
|
+
@improper_orientation = 0
|
62
|
+
@same_contig = 0
|
63
|
+
@realistic_overlap = 0
|
64
|
+
@unrealistic_overlap = 0
|
65
|
+
@realistic_fragment = 0
|
66
|
+
@unrealistic_fragment = 0
|
67
|
+
end
|
68
|
+
|
69
|
+
def realistic_distance insertsize, insertsd
|
70
|
+
insertsize + (3 * insertsd)
|
71
|
+
end
|
72
|
+
|
73
|
+
def check_read_pair ls, rs, realistic_dist
|
74
|
+
return unless ls.primary_aln?
|
75
|
+
@total += 1
|
76
|
+
if ls.both_mapped?
|
77
|
+
# reads are paired
|
78
|
+
@both_mapped += 1
|
79
|
+
if ls.read_properly_paired?
|
80
|
+
# mapped in proper pair
|
81
|
+
@properly_paired += 1
|
82
|
+
self.check_orientation(ls, rs)
|
83
|
+
else
|
84
|
+
# not mapped in proper pair
|
85
|
+
@improperly_paired += 1
|
86
|
+
if ls.chrom == rs.chrom
|
87
|
+
# both on same contig
|
88
|
+
@same_contig += 1
|
89
|
+
self.check_overlap_plausibility(ls, rs)
|
90
|
+
else
|
91
|
+
self.check_fragment_plausibility(ls, rs, realistic_dist)
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def check_orientation ls, rs
|
98
|
+
if ls.pair_opposite_strands?
|
99
|
+
# mates in proper orientation
|
100
|
+
@proper_orientation += 1
|
101
|
+
@good += 1
|
102
|
+
else
|
103
|
+
# mates in wrong orientation
|
104
|
+
@improper_orientation += 1
|
105
|
+
@bad += 1
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
def check_overlap_plausibility ls, rs
|
110
|
+
if Math.sqrt((ls.pos - rs.pos) ** 2) < ls.seq.length
|
111
|
+
# overlap is realistic
|
112
|
+
@realistic_overlap += 1
|
113
|
+
self.check_orientation(ls, rs)
|
114
|
+
else
|
115
|
+
# overlap not realistic
|
116
|
+
@unrealistic_overlap+= 1
|
117
|
+
@bad += 1
|
118
|
+
end
|
119
|
+
end
|
120
|
+
|
121
|
+
def check_fragment_plausibility ls, rs, realistic_dist
|
122
|
+
# mates on different contigs
|
123
|
+
# are the mapping positions within a realistic distance of
|
124
|
+
# the ends of contigs?
|
125
|
+
ldist = [ls.pos, ls.seq.length - ls.pos].min
|
126
|
+
rdist = [rs.pos, rs.seq.length - rs.pos].min
|
127
|
+
if ldist + rdist <= realistic_dist
|
128
|
+
# increase the evidence for this bridge
|
129
|
+
key = [ls.chrom, rs.chrom].sort.join("<>").to_sym
|
130
|
+
if @bridges.has_key? key
|
131
|
+
@bridges[key] += 1
|
132
|
+
else
|
133
|
+
@bridges[key] = 1
|
134
|
+
end
|
135
|
+
@realistic_fragment += 1
|
136
|
+
@good += 1
|
137
|
+
else
|
138
|
+
@unrealistic_fragment += 1
|
139
|
+
@bad += 1
|
140
|
+
end
|
141
|
+
end
|
142
|
+
|
143
|
+
def check_bridges
|
144
|
+
@supported_bridges = 0
|
145
|
+
CSV.open('supported_bridges.csv', 'w') do |f|
|
146
|
+
@bridges.each_pair do |b, count|
|
147
|
+
start, finish = b.to_s.split('<>')
|
148
|
+
if count > 1
|
149
|
+
f << [start, finish, count]
|
150
|
+
@supported_bridges += 1
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
154
|
+
end
|
155
|
+
|
156
|
+
end # ReadMetrics
|
157
|
+
|
158
|
+
end # Transrate
|
@@ -0,0 +1,91 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class ReciprocalAnnotation
|
4
|
+
|
5
|
+
attr_reader :l2r_hits
|
6
|
+
attr_reader :r2l_hits
|
7
|
+
attr_reader :results
|
8
|
+
|
9
|
+
def initialize assembly, reference
|
10
|
+
@assembly = assembly
|
11
|
+
@reference = reference
|
12
|
+
@usearch = Usearch.new
|
13
|
+
end
|
14
|
+
|
15
|
+
def run
|
16
|
+
self.make_assembly_db
|
17
|
+
self.make_reference_db
|
18
|
+
left2right, right2left = self.reciprocal_align
|
19
|
+
self.parse_results left2right, right2left
|
20
|
+
@results
|
21
|
+
end
|
22
|
+
|
23
|
+
def make_assembly_db
|
24
|
+
unless @assembly.orfs_ublast_db
|
25
|
+
assembly_base = File.basename(@assembly.file, ".*")
|
26
|
+
assembly_orfs = assembly_base + ".orfs"
|
27
|
+
@usearch.findorfs @assembly.file, assembly_orfs
|
28
|
+
assembly_db = assembly_base + ".udb"
|
29
|
+
@usearch.makeudb_ublast assembly_orfs, assembly_db
|
30
|
+
@assembly.orfs_ublast_db = assembly_db
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def make_reference_db
|
35
|
+
unless @reference.ublast_db
|
36
|
+
reference_base = File.basename(@reference.file, ".*")
|
37
|
+
reference_db = reference_base + ".udb"
|
38
|
+
@usearch.makeudb_ublast @reference.file, reference_db
|
39
|
+
@reference.ublast_db = reference_db
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def reciprocal_align
|
44
|
+
left2right = @usearch.ublast @assembly.file, @reference.ublast_db
|
45
|
+
right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
|
46
|
+
[left2right, right2left]
|
47
|
+
end
|
48
|
+
|
49
|
+
def parse_results left2right, right2left
|
50
|
+
l2r_results = self.load_results_file left2right
|
51
|
+
r2l_results = self.load_results_file right2left
|
52
|
+
@l2r_hits = self.results_to_hits l2r_results
|
53
|
+
@r2l_hits = self.results_to_hits r2l_results
|
54
|
+
@results = {}
|
55
|
+
@l2r_hits.each_pair do |query, best|
|
56
|
+
next if best.nil?
|
57
|
+
tbest = @r2l_hits[best.target]
|
58
|
+
next if tbest.nil?
|
59
|
+
@results[query] = best if query == tbest.target
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def results_to_hits results
|
64
|
+
hits = {}
|
65
|
+
results.each do |hit|
|
66
|
+
if hits.has_key? hit.query
|
67
|
+
old_hit = hits[hit.query]
|
68
|
+
old_eval, old_bits = old_hit.evalue, old_hit.bitscore
|
69
|
+
if hit.bitscore > old_bits
|
70
|
+
hits[hit.query] = hit
|
71
|
+
elsif hit.bitscore == old_bits && hit.evalue < old_eval
|
72
|
+
hits[hit.query] = hit
|
73
|
+
end
|
74
|
+
else
|
75
|
+
hits[hit.query] = hit
|
76
|
+
end
|
77
|
+
end
|
78
|
+
hits
|
79
|
+
end
|
80
|
+
|
81
|
+
def load_results_file file
|
82
|
+
results = []
|
83
|
+
File.open(file).each_line do |line|
|
84
|
+
results << RBHit.new(line.chomp.split("\t"))
|
85
|
+
end
|
86
|
+
results
|
87
|
+
end
|
88
|
+
|
89
|
+
end # ReciprocalAnnotation
|
90
|
+
|
91
|
+
end # Transrate
|
@@ -0,0 +1,55 @@
|
|
1
|
+
module Transrate
|
2
|
+
|
3
|
+
class Usearch
|
4
|
+
|
5
|
+
require 'which'
|
6
|
+
include Which
|
7
|
+
|
8
|
+
def initialize threads=8
|
9
|
+
@threads = threads
|
10
|
+
paths = which('usearch')
|
11
|
+
if paths.empty?
|
12
|
+
raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
|
13
|
+
end
|
14
|
+
@cmd = paths.first
|
15
|
+
end
|
16
|
+
|
17
|
+
def custom_output_fields
|
18
|
+
" -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
|
19
|
+
end
|
20
|
+
|
21
|
+
def ublast query, target, evalue="1e-5"
|
22
|
+
subcmd = " -ublast #{query}"
|
23
|
+
subcmd += " -db #{target}"
|
24
|
+
subcmd += " -evalue #{evalue}"
|
25
|
+
blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
|
26
|
+
subcmd += " -userout #{blast6outfile}"
|
27
|
+
subcmd += self.custom_output_fields
|
28
|
+
subcmd += " -strand both"
|
29
|
+
subcmd += " -threads #{@threads}"
|
30
|
+
self.run subcmd
|
31
|
+
blast6outfile
|
32
|
+
end
|
33
|
+
|
34
|
+
def makeudb_ublast filepath, output
|
35
|
+
subcmd = " -makeudb_ublast #{filepath}"
|
36
|
+
subcmd += " -output #{output}"
|
37
|
+
self.run subcmd
|
38
|
+
end
|
39
|
+
|
40
|
+
def findorfs filepath, output
|
41
|
+
subcmd = " -findorfs #{filepath}"
|
42
|
+
subcmd += " -output #{output}"
|
43
|
+
subcmd += " -xlat"
|
44
|
+
subcmd += " -orfstyle 7"
|
45
|
+
self.run subcmd
|
46
|
+
end
|
47
|
+
|
48
|
+
def run subcmd
|
49
|
+
subcmd += " -quiet"
|
50
|
+
`#{@cmd}#{subcmd}`
|
51
|
+
end
|
52
|
+
|
53
|
+
end # Usearch
|
54
|
+
|
55
|
+
end # Transrate
|
data/transrate.gemspec
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
require File.expand_path('../lib/transrate/version', __FILE__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = 'transrate'
|
7
|
+
gem.authors = [ "Richard Smith" ]
|
8
|
+
gem.email = "rds45@cam.ac.uk"
|
9
|
+
gem.homepage = 'https://github.com/blahah/assemblotron'
|
10
|
+
gem.summary = %q{ quality assessment of de-novo transcriptome assemblies }
|
11
|
+
gem.description = %q{ a library and command-line tool for quality assessment of de-novo transcriptome assemblies }
|
12
|
+
gem.version = Transrate::VERSION::STRING.dup
|
13
|
+
|
14
|
+
gem.files = `git ls-files`.split("\n")
|
15
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
16
|
+
gem.require_paths = %w[ lib ]
|
17
|
+
|
18
|
+
gem.add_dependency 'rake', '~> 10.1.0'
|
19
|
+
gem.add_dependency 'trollop', '~> 2.0'
|
20
|
+
gem.add_dependency 'which'
|
21
|
+
gem.add_dependency 'bio'
|
22
|
+
gem.add_dependency 'bettersam', '~> 0.0.1.alpha'
|
23
|
+
|
24
|
+
gem.add_development_dependency 'turn'
|
25
|
+
gem.add_development_dependency 'simplecov'
|
26
|
+
gem.add_development_dependency 'shoulda-context'
|
27
|
+
gem.add_development_dependency 'coveralls', '~> 0.6.7'
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,206 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: transrate
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.0.1
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Richard Smith
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-09-17 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: rake
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ~>
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 10.1.0
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ~>
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 10.1.0
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: trollop
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ~>
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '2.0'
|
38
|
+
type: :runtime
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ~>
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '2.0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: which
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :runtime
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: '0'
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: bio
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :runtime
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: bettersam
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ~>
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: 0.0.1.alpha
|
86
|
+
type: :runtime
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ~>
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 0.0.1.alpha
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: turn
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
- !ruby/object:Gem::Dependency
|
111
|
+
name: simplecov
|
112
|
+
requirement: !ruby/object:Gem::Requirement
|
113
|
+
none: false
|
114
|
+
requirements:
|
115
|
+
- - ! '>='
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :development
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
none: false
|
122
|
+
requirements:
|
123
|
+
- - ! '>='
|
124
|
+
- !ruby/object:Gem::Version
|
125
|
+
version: '0'
|
126
|
+
- !ruby/object:Gem::Dependency
|
127
|
+
name: shoulda-context
|
128
|
+
requirement: !ruby/object:Gem::Requirement
|
129
|
+
none: false
|
130
|
+
requirements:
|
131
|
+
- - ! '>='
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: '0'
|
134
|
+
type: :development
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
none: false
|
138
|
+
requirements:
|
139
|
+
- - ! '>='
|
140
|
+
- !ruby/object:Gem::Version
|
141
|
+
version: '0'
|
142
|
+
- !ruby/object:Gem::Dependency
|
143
|
+
name: coveralls
|
144
|
+
requirement: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ~>
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 0.6.7
|
150
|
+
type: :development
|
151
|
+
prerelease: false
|
152
|
+
version_requirements: !ruby/object:Gem::Requirement
|
153
|
+
none: false
|
154
|
+
requirements:
|
155
|
+
- - ~>
|
156
|
+
- !ruby/object:Gem::Version
|
157
|
+
version: 0.6.7
|
158
|
+
description: ! ' a library and command-line tool for quality assessment of de-novo
|
159
|
+
transcriptome assemblies '
|
160
|
+
email: rds45@cam.ac.uk
|
161
|
+
executables:
|
162
|
+
- transrate
|
163
|
+
extensions: []
|
164
|
+
extra_rdoc_files: []
|
165
|
+
files:
|
166
|
+
- .gitignore
|
167
|
+
- Gemfile
|
168
|
+
- Gemfile.lock
|
169
|
+
- LICENSE
|
170
|
+
- README.md
|
171
|
+
- bin/transrate
|
172
|
+
- lib/transrate.rb
|
173
|
+
- lib/transrate/assembly.rb
|
174
|
+
- lib/transrate/bowtie2.rb
|
175
|
+
- lib/transrate/comparative_metrics.rb
|
176
|
+
- lib/transrate/rb_hit.rb
|
177
|
+
- lib/transrate/read_metrics.rb
|
178
|
+
- lib/transrate/reciprocal_annotation.rb
|
179
|
+
- lib/transrate/usearch.rb
|
180
|
+
- lib/transrate/version.rb
|
181
|
+
- transrate.gemspec
|
182
|
+
homepage: https://github.com/blahah/assemblotron
|
183
|
+
licenses: []
|
184
|
+
post_install_message:
|
185
|
+
rdoc_options: []
|
186
|
+
require_paths:
|
187
|
+
- lib
|
188
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
189
|
+
none: false
|
190
|
+
requirements:
|
191
|
+
- - ! '>='
|
192
|
+
- !ruby/object:Gem::Version
|
193
|
+
version: '0'
|
194
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
195
|
+
none: false
|
196
|
+
requirements:
|
197
|
+
- - ! '>='
|
198
|
+
- !ruby/object:Gem::Version
|
199
|
+
version: '0'
|
200
|
+
requirements: []
|
201
|
+
rubyforge_project:
|
202
|
+
rubygems_version: 1.8.24
|
203
|
+
signing_key:
|
204
|
+
specification_version: 3
|
205
|
+
summary: quality assessment of de-novo transcriptome assemblies
|
206
|
+
test_files: []
|