transrate 0.1.0 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +16 -1
- data/.travis.yml +8 -0
- data/README.md +45 -43
- data/Rakefile +36 -0
- data/bin/transrate +98 -50
- data/deps/deps.yaml +55 -0
- data/lib/transrate.rb +19 -4
- data/lib/transrate/assembly.rb +93 -182
- data/lib/transrate/bowtie2.rb +37 -13
- data/lib/transrate/cmd.rb +19 -0
- data/lib/transrate/comparative_metrics.rb +239 -19
- data/lib/transrate/contig.rb +212 -0
- data/lib/transrate/contig_metrics.rb +76 -0
- data/lib/transrate/read_metrics.rb +83 -41
- data/lib/transrate/samtools.rb +73 -0
- data/lib/transrate/transrater.rb +31 -11
- data/lib/transrate/version.rb +1 -1
- data/test/data/150uncovered.l.fq +892 -0
- data/test/data/150uncovered.r.fq +892 -0
- data/test/data/Os.protein.2.fa +95 -0
- data/test/data/Os.protein.fa +199 -0
- data/test/data/assembly.2.fa +26 -0
- data/test/{assembly.fasta → data/assembly.fasta} +0 -0
- data/test/data/bridging_reads.l.fastq +20 -0
- data/test/data/bridging_reads.r.fastq +20 -0
- data/test/data/sorghum_transcript.fa +4 -0
- data/test/data/tiny.sam +4 -0
- data/test/helper.rb +33 -2
- data/test/test_bowtie.rb +54 -0
- data/test/test_cmd.rb +15 -0
- data/test/test_comp_metrics.rb +177 -0
- data/test/test_contig.rb +61 -0
- data/test/test_contig_metrics.rb +50 -0
- data/test/test_inline.rb +10 -9
- data/test/test_read_metrics.rb +68 -0
- data/test/test_samtools.rb +22 -0
- data/test/test_transrate.rb +40 -0
- data/test/test_transrater.rb +68 -0
- data/transrate.gemspec +16 -10
- metadata +232 -57
- data/lib/transrate/express.rb +0 -37
- data/lib/transrate/log.rb +0 -16
- data/lib/transrate/rb_hit.rb +0 -33
- data/lib/transrate/reciprocal_annotation.rb +0 -105
- data/lib/transrate/usearch.rb +0 -66
- data/test/test_test.rb +0 -41
data/lib/transrate/express.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
require 'which'
|
4
|
-
|
5
|
-
class Express
|
6
|
-
|
7
|
-
# return an Express object
|
8
|
-
def initialize
|
9
|
-
express_path = Which::which('express')
|
10
|
-
raise "could not find eXpress in the path" if express_path.empty?
|
11
|
-
@express = express_path.first
|
12
|
-
end
|
13
|
-
|
14
|
-
# return hash of expression for each sequenceID
|
15
|
-
# in the assembly fastafile
|
16
|
-
def quantify_expression assembly, samfile
|
17
|
-
assembly = assembly.file if assembly.is_a? Assembly
|
18
|
-
cmd = "#{@express} --no-bias-correct #{File.expand_path assembly} #{File.expand_path samfile}"
|
19
|
-
ex_output = 'results.xprs'
|
20
|
-
fin_output = "#{assembly}_#{ex_output}"
|
21
|
-
unless File.exists? fin_output
|
22
|
-
`#{cmd} 2>&1`.split(/\n/)[1..30].join("\n")
|
23
|
-
File.rename(ex_output, fin_output)
|
24
|
-
end
|
25
|
-
expression = {}
|
26
|
-
File.open(fin_output).each do |line|
|
27
|
-
line = line.chomp.split("\t")
|
28
|
-
target = line[1]
|
29
|
-
effective_count = line[7]
|
30
|
-
expression[target] = effective_count
|
31
|
-
end
|
32
|
-
expression
|
33
|
-
end
|
34
|
-
|
35
|
-
end # Express
|
36
|
-
|
37
|
-
end # Transrate
|
data/lib/transrate/log.rb
DELETED
data/lib/transrate/rb_hit.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class RBHit
|
4
|
-
|
5
|
-
# Fields: query id, subject id, % identity, alignment length, mismatches,
|
6
|
-
# gap opens, q. start, q. end, s. start, s. end, evalue, bit score
|
7
|
-
attr_accessor :query, :target, :id, :alnlen, :mismatches
|
8
|
-
attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
|
9
|
-
attr_accessor :bitscore, :target_coverage
|
10
|
-
|
11
|
-
def initialize(list)
|
12
|
-
@query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
|
13
|
-
@target = list[1].scan(/[^|]+/).first.split.first
|
14
|
-
@id = list[2]
|
15
|
-
@alnlen = list[3]
|
16
|
-
@mismatches = list[4]
|
17
|
-
@gaps = list[5]
|
18
|
-
@qstart = list[6]
|
19
|
-
@qend = list[7]
|
20
|
-
@tstart = list[8]
|
21
|
-
@tend = list[9]
|
22
|
-
@evalue = list[10]
|
23
|
-
@bitscore = list[11]
|
24
|
-
@target_coverage = list[12].to_i
|
25
|
-
end
|
26
|
-
|
27
|
-
def to_s
|
28
|
-
@query + " => " + @target
|
29
|
-
end
|
30
|
-
|
31
|
-
end # RBHit
|
32
|
-
|
33
|
-
end # Transrate
|
@@ -1,105 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class ReciprocalAnnotation
|
4
|
-
|
5
|
-
attr_reader :l2r_hits
|
6
|
-
attr_reader :r2l_hits
|
7
|
-
attr_reader :results
|
8
|
-
|
9
|
-
def initialize assembly, reference
|
10
|
-
@assembly = assembly
|
11
|
-
@reference = reference
|
12
|
-
@usearch = Usearch.new
|
13
|
-
end
|
14
|
-
|
15
|
-
def run
|
16
|
-
self.make_assembly_db
|
17
|
-
self.make_reference_db
|
18
|
-
left2right, right2left = self.reciprocal_align
|
19
|
-
self.parse_results left2right, right2left
|
20
|
-
@results
|
21
|
-
end
|
22
|
-
|
23
|
-
def make_assembly_db
|
24
|
-
unless @assembly.orfs_ublast_db
|
25
|
-
assembly_dir = File.dirname(@assembly.file)
|
26
|
-
assembly_base = File.basename(@assembly.file, ".*")
|
27
|
-
assembly_orfs = assembly_base + ".orfs"
|
28
|
-
@usearch.findorfs @assembly.file, assembly_orfs
|
29
|
-
assembly_db = File.join(assembly_dir, assembly_base + ".udb")
|
30
|
-
@usearch.makeudb_ublast assembly_orfs, assembly_db
|
31
|
-
@assembly.orfs_ublast_db = assembly_db
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def make_reference_db
|
36
|
-
unless @reference.ublast_db
|
37
|
-
reference_dir = File.dirname(@reference.file)
|
38
|
-
reference_base = File.basename(@reference.file, ".*")
|
39
|
-
reference_db = File.join(reference_dir, reference_base + ".udb")
|
40
|
-
@usearch.makeudb_ublast @reference.file, reference_db
|
41
|
-
@reference.ublast_db = reference_db
|
42
|
-
return reference_db
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def reciprocal_align
|
47
|
-
left2right = @usearch.ublast @assembly.file, @reference.ublast_db
|
48
|
-
right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
|
49
|
-
[left2right, right2left]
|
50
|
-
end
|
51
|
-
|
52
|
-
def parse_results left2right, right2left
|
53
|
-
l2r_results = self.load_results_file left2right
|
54
|
-
r2l_results = self.load_results_file right2left
|
55
|
-
@l2r_hits = self.results_to_hits l2r_results
|
56
|
-
@r2l_hits = self.results_to_hits r2l_results
|
57
|
-
@results = {}
|
58
|
-
@l2r_hits.each_pair do |query, best|
|
59
|
-
next if best.nil?
|
60
|
-
tbest = @r2l_hits[best.target]
|
61
|
-
next if tbest.nil?
|
62
|
-
if query == tbest.target
|
63
|
-
@results[query] = best
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
# what is this method trying to do? :/
|
69
|
-
def results_to_hits results
|
70
|
-
hits = {}
|
71
|
-
results.each do |hit|
|
72
|
-
if hits.has_key? hit.query
|
73
|
-
old_hit = hits[hit.query]
|
74
|
-
old_eval, old_bits = old_hit.evalue, old_hit.bitscore
|
75
|
-
if hit.bitscore == nil
|
76
|
-
p hit
|
77
|
-
abort "oh noes"
|
78
|
-
end
|
79
|
-
if old_bits == nil
|
80
|
-
p old_hit
|
81
|
-
raise 'hell'
|
82
|
-
end
|
83
|
-
if hit.bitscore > old_bits
|
84
|
-
hits[hit.query] = hit
|
85
|
-
elsif hit.bitscore == old_bits && hit.evalue < old_eval
|
86
|
-
hits[hit.query] = hit
|
87
|
-
end
|
88
|
-
else
|
89
|
-
hits[hit.query] = hit
|
90
|
-
end
|
91
|
-
end
|
92
|
-
hits
|
93
|
-
end
|
94
|
-
|
95
|
-
def load_results_file file
|
96
|
-
results = []
|
97
|
-
File.open(file).each_line do |line|
|
98
|
-
results << RBHit.new(line.chomp.split("\t"))
|
99
|
-
end
|
100
|
-
results
|
101
|
-
end
|
102
|
-
|
103
|
-
end # ReciprocalAnnotation
|
104
|
-
|
105
|
-
end # Transrate
|
data/lib/transrate/usearch.rb
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class Usearch
|
4
|
-
|
5
|
-
require 'which'
|
6
|
-
include Which
|
7
|
-
|
8
|
-
def initialize threads=8
|
9
|
-
@threads = threads
|
10
|
-
paths = which('usearch')
|
11
|
-
if paths.empty?
|
12
|
-
raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
|
13
|
-
end
|
14
|
-
@cmd = paths.first
|
15
|
-
end
|
16
|
-
|
17
|
-
def custom_output_fields
|
18
|
-
" -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
|
19
|
-
end
|
20
|
-
|
21
|
-
def ublast query, target, evalue="1e-5"
|
22
|
-
blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
|
23
|
-
unless File.exists? blast6outfile
|
24
|
-
subcmd = " -ublast #{query}"
|
25
|
-
subcmd += " -db #{target}"
|
26
|
-
subcmd += " -evalue #{evalue}"
|
27
|
-
subcmd += " -userout #{blast6outfile}"
|
28
|
-
subcmd += self.custom_output_fields
|
29
|
-
subcmd += " -strand both"
|
30
|
-
subcmd += " -threads #{@threads}"
|
31
|
-
self.run subcmd
|
32
|
-
end
|
33
|
-
blast6outfile
|
34
|
-
end
|
35
|
-
|
36
|
-
def makeudb_ublast filepath, output
|
37
|
-
unless File.exists? output
|
38
|
-
subcmd = " -makeudb_ublast #{filepath}"
|
39
|
-
subcmd += " -output #{output}"
|
40
|
-
self.run subcmd
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def findorfs filepath, output
|
45
|
-
if File.exists? output
|
46
|
-
puts "skipping ORF finding: ORF file already exists at #{output}"
|
47
|
-
else
|
48
|
-
subcmd = " -findorfs #{filepath}"
|
49
|
-
subcmd += " -output #{output}"
|
50
|
-
subcmd += " -xlat"
|
51
|
-
subcmd += " -orfstyle 7"
|
52
|
-
self.run subcmd
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def run subcmd
|
57
|
-
subcmd += " -quiet"
|
58
|
-
ret = `#{@cmd}#{subcmd} 2>&1`
|
59
|
-
unless $?.exitstatus == 0
|
60
|
-
puts "usearch command failed: #{subcmd}\noutput:\n#{ret}"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
end # Usearch
|
65
|
-
|
66
|
-
end # Transrate
|
data/test/test_test.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'helper'
|
4
|
-
|
5
|
-
class TestTransrate < Test::Unit::TestCase
|
6
|
-
|
7
|
-
context "transrate" do
|
8
|
-
|
9
|
-
setup do
|
10
|
-
@a = Transrate::Assembly.new("test/assembly.fasta")
|
11
|
-
@seq1 = "ATGCCCGGGTAG"
|
12
|
-
end
|
13
|
-
|
14
|
-
should "run metrics on assembly" do
|
15
|
-
ans = @a.run(2) # using 2 threads
|
16
|
-
assert_equal ans, true, "should run but returned #{ans}"
|
17
|
-
end
|
18
|
-
|
19
|
-
should "find longest orf" do
|
20
|
-
len = @a.orf_length("ATGCCCGGGTAG")
|
21
|
-
assert_equal len, 3, "expected 4 but got #{len}"
|
22
|
-
end
|
23
|
-
|
24
|
-
should "find longest orf in file" do
|
25
|
-
orfs = []
|
26
|
-
@a.assembly.each do |entry|
|
27
|
-
l = @a.orf_length entry.seq
|
28
|
-
orfs << l
|
29
|
-
end
|
30
|
-
assert_equal orfs.length, 4
|
31
|
-
assert_equal orfs, [333, 370, 131, 84]
|
32
|
-
end
|
33
|
-
|
34
|
-
should "find the mean length" do
|
35
|
-
ans = @a.run(2)
|
36
|
-
mean = @a.mean_len
|
37
|
-
assert_equal mean, 1508.25
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|