transrate 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +16 -1
- data/.travis.yml +8 -0
- data/README.md +45 -43
- data/Rakefile +36 -0
- data/bin/transrate +98 -50
- data/deps/deps.yaml +55 -0
- data/lib/transrate.rb +19 -4
- data/lib/transrate/assembly.rb +93 -182
- data/lib/transrate/bowtie2.rb +37 -13
- data/lib/transrate/cmd.rb +19 -0
- data/lib/transrate/comparative_metrics.rb +239 -19
- data/lib/transrate/contig.rb +212 -0
- data/lib/transrate/contig_metrics.rb +76 -0
- data/lib/transrate/read_metrics.rb +83 -41
- data/lib/transrate/samtools.rb +73 -0
- data/lib/transrate/transrater.rb +31 -11
- data/lib/transrate/version.rb +1 -1
- data/test/data/150uncovered.l.fq +892 -0
- data/test/data/150uncovered.r.fq +892 -0
- data/test/data/Os.protein.2.fa +95 -0
- data/test/data/Os.protein.fa +199 -0
- data/test/data/assembly.2.fa +26 -0
- data/test/{assembly.fasta → data/assembly.fasta} +0 -0
- data/test/data/bridging_reads.l.fastq +20 -0
- data/test/data/bridging_reads.r.fastq +20 -0
- data/test/data/sorghum_transcript.fa +4 -0
- data/test/data/tiny.sam +4 -0
- data/test/helper.rb +33 -2
- data/test/test_bowtie.rb +54 -0
- data/test/test_cmd.rb +15 -0
- data/test/test_comp_metrics.rb +177 -0
- data/test/test_contig.rb +61 -0
- data/test/test_contig_metrics.rb +50 -0
- data/test/test_inline.rb +10 -9
- data/test/test_read_metrics.rb +68 -0
- data/test/test_samtools.rb +22 -0
- data/test/test_transrate.rb +40 -0
- data/test/test_transrater.rb +68 -0
- data/transrate.gemspec +16 -10
- metadata +232 -57
- data/lib/transrate/express.rb +0 -37
- data/lib/transrate/log.rb +0 -16
- data/lib/transrate/rb_hit.rb +0 -33
- data/lib/transrate/reciprocal_annotation.rb +0 -105
- data/lib/transrate/usearch.rb +0 -66
- data/test/test_test.rb +0 -41
data/lib/transrate/express.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
require 'which'
|
4
|
-
|
5
|
-
class Express
|
6
|
-
|
7
|
-
# return an Express object
|
8
|
-
def initialize
|
9
|
-
express_path = Which::which('express')
|
10
|
-
raise "could not find eXpress in the path" if express_path.empty?
|
11
|
-
@express = express_path.first
|
12
|
-
end
|
13
|
-
|
14
|
-
# return hash of expression for each sequenceID
|
15
|
-
# in the assembly fastafile
|
16
|
-
def quantify_expression assembly, samfile
|
17
|
-
assembly = assembly.file if assembly.is_a? Assembly
|
18
|
-
cmd = "#{@express} --no-bias-correct #{File.expand_path assembly} #{File.expand_path samfile}"
|
19
|
-
ex_output = 'results.xprs'
|
20
|
-
fin_output = "#{assembly}_#{ex_output}"
|
21
|
-
unless File.exists? fin_output
|
22
|
-
`#{cmd} 2>&1`.split(/\n/)[1..30].join("\n")
|
23
|
-
File.rename(ex_output, fin_output)
|
24
|
-
end
|
25
|
-
expression = {}
|
26
|
-
File.open(fin_output).each do |line|
|
27
|
-
line = line.chomp.split("\t")
|
28
|
-
target = line[1]
|
29
|
-
effective_count = line[7]
|
30
|
-
expression[target] = effective_count
|
31
|
-
end
|
32
|
-
expression
|
33
|
-
end
|
34
|
-
|
35
|
-
end # Express
|
36
|
-
|
37
|
-
end # Transrate
|
data/lib/transrate/log.rb
DELETED
data/lib/transrate/rb_hit.rb
DELETED
@@ -1,33 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class RBHit
|
4
|
-
|
5
|
-
# Fields: query id, subject id, % identity, alignment length, mismatches,
|
6
|
-
# gap opens, q. start, q. end, s. start, s. end, evalue, bit score
|
7
|
-
attr_accessor :query, :target, :id, :alnlen, :mismatches
|
8
|
-
attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
|
9
|
-
attr_accessor :bitscore, :target_coverage
|
10
|
-
|
11
|
-
def initialize(list)
|
12
|
-
@query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
|
13
|
-
@target = list[1].scan(/[^|]+/).first.split.first
|
14
|
-
@id = list[2]
|
15
|
-
@alnlen = list[3]
|
16
|
-
@mismatches = list[4]
|
17
|
-
@gaps = list[5]
|
18
|
-
@qstart = list[6]
|
19
|
-
@qend = list[7]
|
20
|
-
@tstart = list[8]
|
21
|
-
@tend = list[9]
|
22
|
-
@evalue = list[10]
|
23
|
-
@bitscore = list[11]
|
24
|
-
@target_coverage = list[12].to_i
|
25
|
-
end
|
26
|
-
|
27
|
-
def to_s
|
28
|
-
@query + " => " + @target
|
29
|
-
end
|
30
|
-
|
31
|
-
end # RBHit
|
32
|
-
|
33
|
-
end # Transrate
|
@@ -1,105 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class ReciprocalAnnotation
|
4
|
-
|
5
|
-
attr_reader :l2r_hits
|
6
|
-
attr_reader :r2l_hits
|
7
|
-
attr_reader :results
|
8
|
-
|
9
|
-
def initialize assembly, reference
|
10
|
-
@assembly = assembly
|
11
|
-
@reference = reference
|
12
|
-
@usearch = Usearch.new
|
13
|
-
end
|
14
|
-
|
15
|
-
def run
|
16
|
-
self.make_assembly_db
|
17
|
-
self.make_reference_db
|
18
|
-
left2right, right2left = self.reciprocal_align
|
19
|
-
self.parse_results left2right, right2left
|
20
|
-
@results
|
21
|
-
end
|
22
|
-
|
23
|
-
def make_assembly_db
|
24
|
-
unless @assembly.orfs_ublast_db
|
25
|
-
assembly_dir = File.dirname(@assembly.file)
|
26
|
-
assembly_base = File.basename(@assembly.file, ".*")
|
27
|
-
assembly_orfs = assembly_base + ".orfs"
|
28
|
-
@usearch.findorfs @assembly.file, assembly_orfs
|
29
|
-
assembly_db = File.join(assembly_dir, assembly_base + ".udb")
|
30
|
-
@usearch.makeudb_ublast assembly_orfs, assembly_db
|
31
|
-
@assembly.orfs_ublast_db = assembly_db
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
def make_reference_db
|
36
|
-
unless @reference.ublast_db
|
37
|
-
reference_dir = File.dirname(@reference.file)
|
38
|
-
reference_base = File.basename(@reference.file, ".*")
|
39
|
-
reference_db = File.join(reference_dir, reference_base + ".udb")
|
40
|
-
@usearch.makeudb_ublast @reference.file, reference_db
|
41
|
-
@reference.ublast_db = reference_db
|
42
|
-
return reference_db
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
def reciprocal_align
|
47
|
-
left2right = @usearch.ublast @assembly.file, @reference.ublast_db
|
48
|
-
right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
|
49
|
-
[left2right, right2left]
|
50
|
-
end
|
51
|
-
|
52
|
-
def parse_results left2right, right2left
|
53
|
-
l2r_results = self.load_results_file left2right
|
54
|
-
r2l_results = self.load_results_file right2left
|
55
|
-
@l2r_hits = self.results_to_hits l2r_results
|
56
|
-
@r2l_hits = self.results_to_hits r2l_results
|
57
|
-
@results = {}
|
58
|
-
@l2r_hits.each_pair do |query, best|
|
59
|
-
next if best.nil?
|
60
|
-
tbest = @r2l_hits[best.target]
|
61
|
-
next if tbest.nil?
|
62
|
-
if query == tbest.target
|
63
|
-
@results[query] = best
|
64
|
-
end
|
65
|
-
end
|
66
|
-
end
|
67
|
-
|
68
|
-
# what is this method trying to do? :/
|
69
|
-
def results_to_hits results
|
70
|
-
hits = {}
|
71
|
-
results.each do |hit|
|
72
|
-
if hits.has_key? hit.query
|
73
|
-
old_hit = hits[hit.query]
|
74
|
-
old_eval, old_bits = old_hit.evalue, old_hit.bitscore
|
75
|
-
if hit.bitscore == nil
|
76
|
-
p hit
|
77
|
-
abort "oh noes"
|
78
|
-
end
|
79
|
-
if old_bits == nil
|
80
|
-
p old_hit
|
81
|
-
raise 'hell'
|
82
|
-
end
|
83
|
-
if hit.bitscore > old_bits
|
84
|
-
hits[hit.query] = hit
|
85
|
-
elsif hit.bitscore == old_bits && hit.evalue < old_eval
|
86
|
-
hits[hit.query] = hit
|
87
|
-
end
|
88
|
-
else
|
89
|
-
hits[hit.query] = hit
|
90
|
-
end
|
91
|
-
end
|
92
|
-
hits
|
93
|
-
end
|
94
|
-
|
95
|
-
def load_results_file file
|
96
|
-
results = []
|
97
|
-
File.open(file).each_line do |line|
|
98
|
-
results << RBHit.new(line.chomp.split("\t"))
|
99
|
-
end
|
100
|
-
results
|
101
|
-
end
|
102
|
-
|
103
|
-
end # ReciprocalAnnotation
|
104
|
-
|
105
|
-
end # Transrate
|
data/lib/transrate/usearch.rb
DELETED
@@ -1,66 +0,0 @@
|
|
1
|
-
module Transrate
|
2
|
-
|
3
|
-
class Usearch
|
4
|
-
|
5
|
-
require 'which'
|
6
|
-
include Which
|
7
|
-
|
8
|
-
def initialize threads=8
|
9
|
-
@threads = threads
|
10
|
-
paths = which('usearch')
|
11
|
-
if paths.empty?
|
12
|
-
raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
|
13
|
-
end
|
14
|
-
@cmd = paths.first
|
15
|
-
end
|
16
|
-
|
17
|
-
def custom_output_fields
|
18
|
-
" -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
|
19
|
-
end
|
20
|
-
|
21
|
-
def ublast query, target, evalue="1e-5"
|
22
|
-
blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
|
23
|
-
unless File.exists? blast6outfile
|
24
|
-
subcmd = " -ublast #{query}"
|
25
|
-
subcmd += " -db #{target}"
|
26
|
-
subcmd += " -evalue #{evalue}"
|
27
|
-
subcmd += " -userout #{blast6outfile}"
|
28
|
-
subcmd += self.custom_output_fields
|
29
|
-
subcmd += " -strand both"
|
30
|
-
subcmd += " -threads #{@threads}"
|
31
|
-
self.run subcmd
|
32
|
-
end
|
33
|
-
blast6outfile
|
34
|
-
end
|
35
|
-
|
36
|
-
def makeudb_ublast filepath, output
|
37
|
-
unless File.exists? output
|
38
|
-
subcmd = " -makeudb_ublast #{filepath}"
|
39
|
-
subcmd += " -output #{output}"
|
40
|
-
self.run subcmd
|
41
|
-
end
|
42
|
-
end
|
43
|
-
|
44
|
-
def findorfs filepath, output
|
45
|
-
if File.exists? output
|
46
|
-
puts "skipping ORF finding: ORF file already exists at #{output}"
|
47
|
-
else
|
48
|
-
subcmd = " -findorfs #{filepath}"
|
49
|
-
subcmd += " -output #{output}"
|
50
|
-
subcmd += " -xlat"
|
51
|
-
subcmd += " -orfstyle 7"
|
52
|
-
self.run subcmd
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def run subcmd
|
57
|
-
subcmd += " -quiet"
|
58
|
-
ret = `#{@cmd}#{subcmd} 2>&1`
|
59
|
-
unless $?.exitstatus == 0
|
60
|
-
puts "usearch command failed: #{subcmd}\noutput:\n#{ret}"
|
61
|
-
end
|
62
|
-
end
|
63
|
-
|
64
|
-
end # Usearch
|
65
|
-
|
66
|
-
end # Transrate
|
data/test/test_test.rb
DELETED
@@ -1,41 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
require 'helper'
|
4
|
-
|
5
|
-
class TestTransrate < Test::Unit::TestCase
|
6
|
-
|
7
|
-
context "transrate" do
|
8
|
-
|
9
|
-
setup do
|
10
|
-
@a = Transrate::Assembly.new("test/assembly.fasta")
|
11
|
-
@seq1 = "ATGCCCGGGTAG"
|
12
|
-
end
|
13
|
-
|
14
|
-
should "run metrics on assembly" do
|
15
|
-
ans = @a.run(2) # using 2 threads
|
16
|
-
assert_equal ans, true, "should run but returned #{ans}"
|
17
|
-
end
|
18
|
-
|
19
|
-
should "find longest orf" do
|
20
|
-
len = @a.orf_length("ATGCCCGGGTAG")
|
21
|
-
assert_equal len, 3, "expected 4 but got #{len}"
|
22
|
-
end
|
23
|
-
|
24
|
-
should "find longest orf in file" do
|
25
|
-
orfs = []
|
26
|
-
@a.assembly.each do |entry|
|
27
|
-
l = @a.orf_length entry.seq
|
28
|
-
orfs << l
|
29
|
-
end
|
30
|
-
assert_equal orfs.length, 4
|
31
|
-
assert_equal orfs, [333, 370, 131, 84]
|
32
|
-
end
|
33
|
-
|
34
|
-
should "find the mean length" do
|
35
|
-
ans = @a.run(2)
|
36
|
-
mean = @a.mean_len
|
37
|
-
assert_equal mean, 1508.25
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
end
|