transrate 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +16 -1
  3. data/.travis.yml +8 -0
  4. data/README.md +45 -43
  5. data/Rakefile +36 -0
  6. data/bin/transrate +98 -50
  7. data/deps/deps.yaml +55 -0
  8. data/lib/transrate.rb +19 -4
  9. data/lib/transrate/assembly.rb +93 -182
  10. data/lib/transrate/bowtie2.rb +37 -13
  11. data/lib/transrate/cmd.rb +19 -0
  12. data/lib/transrate/comparative_metrics.rb +239 -19
  13. data/lib/transrate/contig.rb +212 -0
  14. data/lib/transrate/contig_metrics.rb +76 -0
  15. data/lib/transrate/read_metrics.rb +83 -41
  16. data/lib/transrate/samtools.rb +73 -0
  17. data/lib/transrate/transrater.rb +31 -11
  18. data/lib/transrate/version.rb +1 -1
  19. data/test/data/150uncovered.l.fq +892 -0
  20. data/test/data/150uncovered.r.fq +892 -0
  21. data/test/data/Os.protein.2.fa +95 -0
  22. data/test/data/Os.protein.fa +199 -0
  23. data/test/data/assembly.2.fa +26 -0
  24. data/test/{assembly.fasta → data/assembly.fasta} +0 -0
  25. data/test/data/bridging_reads.l.fastq +20 -0
  26. data/test/data/bridging_reads.r.fastq +20 -0
  27. data/test/data/sorghum_transcript.fa +4 -0
  28. data/test/data/tiny.sam +4 -0
  29. data/test/helper.rb +33 -2
  30. data/test/test_bowtie.rb +54 -0
  31. data/test/test_cmd.rb +15 -0
  32. data/test/test_comp_metrics.rb +177 -0
  33. data/test/test_contig.rb +61 -0
  34. data/test/test_contig_metrics.rb +50 -0
  35. data/test/test_inline.rb +10 -9
  36. data/test/test_read_metrics.rb +68 -0
  37. data/test/test_samtools.rb +22 -0
  38. data/test/test_transrate.rb +40 -0
  39. data/test/test_transrater.rb +68 -0
  40. data/transrate.gemspec +16 -10
  41. metadata +232 -57
  42. data/lib/transrate/express.rb +0 -37
  43. data/lib/transrate/log.rb +0 -16
  44. data/lib/transrate/rb_hit.rb +0 -33
  45. data/lib/transrate/reciprocal_annotation.rb +0 -105
  46. data/lib/transrate/usearch.rb +0 -66
  47. data/test/test_test.rb +0 -41
@@ -1,37 +0,0 @@
1
- module Transrate
2
-
3
- require 'which'
4
-
5
- class Express
6
-
7
- # return an Express object
8
- def initialize
9
- express_path = Which::which('express')
10
- raise "could not find eXpress in the path" if express_path.empty?
11
- @express = express_path.first
12
- end
13
-
14
- # return hash of expression for each sequenceID
15
- # in the assembly fastafile
16
- def quantify_expression assembly, samfile
17
- assembly = assembly.file if assembly.is_a? Assembly
18
- cmd = "#{@express} --no-bias-correct #{File.expand_path assembly} #{File.expand_path samfile}"
19
- ex_output = 'results.xprs'
20
- fin_output = "#{assembly}_#{ex_output}"
21
- unless File.exists? fin_output
22
- `#{cmd} 2>&1`.split(/\n/)[1..30].join("\n")
23
- File.rename(ex_output, fin_output)
24
- end
25
- expression = {}
26
- File.open(fin_output).each do |line|
27
- line = line.chomp.split("\t")
28
- target = line[1]
29
- effective_count = line[7]
30
- expression[target] = effective_count
31
- end
32
- expression
33
- end
34
-
35
- end # Express
36
-
37
- end # Transrate
data/lib/transrate/log.rb DELETED
@@ -1,16 +0,0 @@
1
- require 'logger'
2
-
3
- module Transrate
4
-
5
- class Log < Logger
6
-
7
- def dump_process?(status, output, msg)
8
- unless status.exitstatus == 0
9
- fatal(msg)
10
- fatal(output)
11
- end
12
- end
13
-
14
- end # Log
15
-
16
- end # Transrate
@@ -1,33 +0,0 @@
1
- module Transrate
2
-
3
- class RBHit
4
-
5
- # Fields: query id, subject id, % identity, alignment length, mismatches,
6
- # gap opens, q. start, q. end, s. start, s. end, evalue, bit score
7
- attr_accessor :query, :target, :id, :alnlen, :mismatches
8
- attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
9
- attr_accessor :bitscore, :target_coverage
10
-
11
- def initialize(list)
12
- @query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
13
- @target = list[1].scan(/[^|]+/).first.split.first
14
- @id = list[2]
15
- @alnlen = list[3]
16
- @mismatches = list[4]
17
- @gaps = list[5]
18
- @qstart = list[6]
19
- @qend = list[7]
20
- @tstart = list[8]
21
- @tend = list[9]
22
- @evalue = list[10]
23
- @bitscore = list[11]
24
- @target_coverage = list[12].to_i
25
- end
26
-
27
- def to_s
28
- @query + " => " + @target
29
- end
30
-
31
- end # RBHit
32
-
33
- end # Transrate
@@ -1,105 +0,0 @@
1
- module Transrate
2
-
3
- class ReciprocalAnnotation
4
-
5
- attr_reader :l2r_hits
6
- attr_reader :r2l_hits
7
- attr_reader :results
8
-
9
- def initialize assembly, reference
10
- @assembly = assembly
11
- @reference = reference
12
- @usearch = Usearch.new
13
- end
14
-
15
- def run
16
- self.make_assembly_db
17
- self.make_reference_db
18
- left2right, right2left = self.reciprocal_align
19
- self.parse_results left2right, right2left
20
- @results
21
- end
22
-
23
- def make_assembly_db
24
- unless @assembly.orfs_ublast_db
25
- assembly_dir = File.dirname(@assembly.file)
26
- assembly_base = File.basename(@assembly.file, ".*")
27
- assembly_orfs = assembly_base + ".orfs"
28
- @usearch.findorfs @assembly.file, assembly_orfs
29
- assembly_db = File.join(assembly_dir, assembly_base + ".udb")
30
- @usearch.makeudb_ublast assembly_orfs, assembly_db
31
- @assembly.orfs_ublast_db = assembly_db
32
- end
33
- end
34
-
35
- def make_reference_db
36
- unless @reference.ublast_db
37
- reference_dir = File.dirname(@reference.file)
38
- reference_base = File.basename(@reference.file, ".*")
39
- reference_db = File.join(reference_dir, reference_base + ".udb")
40
- @usearch.makeudb_ublast @reference.file, reference_db
41
- @reference.ublast_db = reference_db
42
- return reference_db
43
- end
44
- end
45
-
46
- def reciprocal_align
47
- left2right = @usearch.ublast @assembly.file, @reference.ublast_db
48
- right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
49
- [left2right, right2left]
50
- end
51
-
52
- def parse_results left2right, right2left
53
- l2r_results = self.load_results_file left2right
54
- r2l_results = self.load_results_file right2left
55
- @l2r_hits = self.results_to_hits l2r_results
56
- @r2l_hits = self.results_to_hits r2l_results
57
- @results = {}
58
- @l2r_hits.each_pair do |query, best|
59
- next if best.nil?
60
- tbest = @r2l_hits[best.target]
61
- next if tbest.nil?
62
- if query == tbest.target
63
- @results[query] = best
64
- end
65
- end
66
- end
67
-
68
- # what is this method trying to do? :/
69
- def results_to_hits results
70
- hits = {}
71
- results.each do |hit|
72
- if hits.has_key? hit.query
73
- old_hit = hits[hit.query]
74
- old_eval, old_bits = old_hit.evalue, old_hit.bitscore
75
- if hit.bitscore == nil
76
- p hit
77
- abort "oh noes"
78
- end
79
- if old_bits == nil
80
- p old_hit
81
- raise 'hell'
82
- end
83
- if hit.bitscore > old_bits
84
- hits[hit.query] = hit
85
- elsif hit.bitscore == old_bits && hit.evalue < old_eval
86
- hits[hit.query] = hit
87
- end
88
- else
89
- hits[hit.query] = hit
90
- end
91
- end
92
- hits
93
- end
94
-
95
- def load_results_file file
96
- results = []
97
- File.open(file).each_line do |line|
98
- results << RBHit.new(line.chomp.split("\t"))
99
- end
100
- results
101
- end
102
-
103
- end # ReciprocalAnnotation
104
-
105
- end # Transrate
@@ -1,66 +0,0 @@
1
- module Transrate
2
-
3
- class Usearch
4
-
5
- require 'which'
6
- include Which
7
-
8
- def initialize threads=8
9
- @threads = threads
10
- paths = which('usearch')
11
- if paths.empty?
12
- raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
13
- end
14
- @cmd = paths.first
15
- end
16
-
17
- def custom_output_fields
18
- " -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
19
- end
20
-
21
- def ublast query, target, evalue="1e-5"
22
- blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
23
- unless File.exists? blast6outfile
24
- subcmd = " -ublast #{query}"
25
- subcmd += " -db #{target}"
26
- subcmd += " -evalue #{evalue}"
27
- subcmd += " -userout #{blast6outfile}"
28
- subcmd += self.custom_output_fields
29
- subcmd += " -strand both"
30
- subcmd += " -threads #{@threads}"
31
- self.run subcmd
32
- end
33
- blast6outfile
34
- end
35
-
36
- def makeudb_ublast filepath, output
37
- unless File.exists? output
38
- subcmd = " -makeudb_ublast #{filepath}"
39
- subcmd += " -output #{output}"
40
- self.run subcmd
41
- end
42
- end
43
-
44
- def findorfs filepath, output
45
- if File.exists? output
46
- puts "skipping ORF finding: ORF file already exists at #{output}"
47
- else
48
- subcmd = " -findorfs #{filepath}"
49
- subcmd += " -output #{output}"
50
- subcmd += " -xlat"
51
- subcmd += " -orfstyle 7"
52
- self.run subcmd
53
- end
54
- end
55
-
56
- def run subcmd
57
- subcmd += " -quiet"
58
- ret = `#{@cmd}#{subcmd} 2>&1`
59
- unless $?.exitstatus == 0
60
- puts "usearch command failed: #{subcmd}\noutput:\n#{ret}"
61
- end
62
- end
63
-
64
- end # Usearch
65
-
66
- end # Transrate
data/test/test_test.rb DELETED
@@ -1,41 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'helper'
4
-
5
- class TestTransrate < Test::Unit::TestCase
6
-
7
- context "transrate" do
8
-
9
- setup do
10
- @a = Transrate::Assembly.new("test/assembly.fasta")
11
- @seq1 = "ATGCCCGGGTAG"
12
- end
13
-
14
- should "run metrics on assembly" do
15
- ans = @a.run(2) # using 2 threads
16
- assert_equal ans, true, "should run but returned #{ans}"
17
- end
18
-
19
- should "find longest orf" do
20
- len = @a.orf_length("ATGCCCGGGTAG")
21
- assert_equal len, 3, "expected 4 but got #{len}"
22
- end
23
-
24
- should "find longest orf in file" do
25
- orfs = []
26
- @a.assembly.each do |entry|
27
- l = @a.orf_length entry.seq
28
- orfs << l
29
- end
30
- assert_equal orfs.length, 4
31
- assert_equal orfs, [333, 370, 131, 84]
32
- end
33
-
34
- should "find the mean length" do
35
- ans = @a.run(2)
36
- mean = @a.mean_len
37
- assert_equal mean, 1508.25
38
- end
39
-
40
- end
41
- end