transrate 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +16 -1
  3. data/.travis.yml +8 -0
  4. data/README.md +45 -43
  5. data/Rakefile +36 -0
  6. data/bin/transrate +98 -50
  7. data/deps/deps.yaml +55 -0
  8. data/lib/transrate.rb +19 -4
  9. data/lib/transrate/assembly.rb +93 -182
  10. data/lib/transrate/bowtie2.rb +37 -13
  11. data/lib/transrate/cmd.rb +19 -0
  12. data/lib/transrate/comparative_metrics.rb +239 -19
  13. data/lib/transrate/contig.rb +212 -0
  14. data/lib/transrate/contig_metrics.rb +76 -0
  15. data/lib/transrate/read_metrics.rb +83 -41
  16. data/lib/transrate/samtools.rb +73 -0
  17. data/lib/transrate/transrater.rb +31 -11
  18. data/lib/transrate/version.rb +1 -1
  19. data/test/data/150uncovered.l.fq +892 -0
  20. data/test/data/150uncovered.r.fq +892 -0
  21. data/test/data/Os.protein.2.fa +95 -0
  22. data/test/data/Os.protein.fa +199 -0
  23. data/test/data/assembly.2.fa +26 -0
  24. data/test/{assembly.fasta → data/assembly.fasta} +0 -0
  25. data/test/data/bridging_reads.l.fastq +20 -0
  26. data/test/data/bridging_reads.r.fastq +20 -0
  27. data/test/data/sorghum_transcript.fa +4 -0
  28. data/test/data/tiny.sam +4 -0
  29. data/test/helper.rb +33 -2
  30. data/test/test_bowtie.rb +54 -0
  31. data/test/test_cmd.rb +15 -0
  32. data/test/test_comp_metrics.rb +177 -0
  33. data/test/test_contig.rb +61 -0
  34. data/test/test_contig_metrics.rb +50 -0
  35. data/test/test_inline.rb +10 -9
  36. data/test/test_read_metrics.rb +68 -0
  37. data/test/test_samtools.rb +22 -0
  38. data/test/test_transrate.rb +40 -0
  39. data/test/test_transrater.rb +68 -0
  40. data/transrate.gemspec +16 -10
  41. metadata +232 -57
  42. data/lib/transrate/express.rb +0 -37
  43. data/lib/transrate/log.rb +0 -16
  44. data/lib/transrate/rb_hit.rb +0 -33
  45. data/lib/transrate/reciprocal_annotation.rb +0 -105
  46. data/lib/transrate/usearch.rb +0 -66
  47. data/test/test_test.rb +0 -41
@@ -1,37 +0,0 @@
1
- module Transrate
2
-
3
- require 'which'
4
-
5
- class Express
6
-
7
- # return an Express object
8
- def initialize
9
- express_path = Which::which('express')
10
- raise "could not find eXpress in the path" if express_path.empty?
11
- @express = express_path.first
12
- end
13
-
14
- # return hash of expression for each sequenceID
15
- # in the assembly fastafile
16
- def quantify_expression assembly, samfile
17
- assembly = assembly.file if assembly.is_a? Assembly
18
- cmd = "#{@express} --no-bias-correct #{File.expand_path assembly} #{File.expand_path samfile}"
19
- ex_output = 'results.xprs'
20
- fin_output = "#{assembly}_#{ex_output}"
21
- unless File.exists? fin_output
22
- `#{cmd} 2>&1`.split(/\n/)[1..30].join("\n")
23
- File.rename(ex_output, fin_output)
24
- end
25
- expression = {}
26
- File.open(fin_output).each do |line|
27
- line = line.chomp.split("\t")
28
- target = line[1]
29
- effective_count = line[7]
30
- expression[target] = effective_count
31
- end
32
- expression
33
- end
34
-
35
- end # Express
36
-
37
- end # Transrate
data/lib/transrate/log.rb DELETED
@@ -1,16 +0,0 @@
1
- require 'logger'
2
-
3
- module Transrate
4
-
5
- class Log < Logger
6
-
7
- def dump_process?(status, output, msg)
8
- unless status.exitstatus == 0
9
- fatal(msg)
10
- fatal(output)
11
- end
12
- end
13
-
14
- end # Log
15
-
16
- end # Transrate
@@ -1,33 +0,0 @@
1
- module Transrate
2
-
3
- class RBHit
4
-
5
- # Fields: query id, subject id, % identity, alignment length, mismatches,
6
- # gap opens, q. start, q. end, s. start, s. end, evalue, bit score
7
- attr_accessor :query, :target, :id, :alnlen, :mismatches
8
- attr_accessor :gaps, :qstart, :qend, :tstart, :tend, :evalue
9
- attr_accessor :bitscore, :target_coverage
10
-
11
- def initialize(list)
12
- @query = list[0].scan(/[^|]+/).first.split.first # extract only identifier
13
- @target = list[1].scan(/[^|]+/).first.split.first
14
- @id = list[2]
15
- @alnlen = list[3]
16
- @mismatches = list[4]
17
- @gaps = list[5]
18
- @qstart = list[6]
19
- @qend = list[7]
20
- @tstart = list[8]
21
- @tend = list[9]
22
- @evalue = list[10]
23
- @bitscore = list[11]
24
- @target_coverage = list[12].to_i
25
- end
26
-
27
- def to_s
28
- @query + " => " + @target
29
- end
30
-
31
- end # RBHit
32
-
33
- end # Transrate
@@ -1,105 +0,0 @@
1
- module Transrate
2
-
3
- class ReciprocalAnnotation
4
-
5
- attr_reader :l2r_hits
6
- attr_reader :r2l_hits
7
- attr_reader :results
8
-
9
- def initialize assembly, reference
10
- @assembly = assembly
11
- @reference = reference
12
- @usearch = Usearch.new
13
- end
14
-
15
- def run
16
- self.make_assembly_db
17
- self.make_reference_db
18
- left2right, right2left = self.reciprocal_align
19
- self.parse_results left2right, right2left
20
- @results
21
- end
22
-
23
- def make_assembly_db
24
- unless @assembly.orfs_ublast_db
25
- assembly_dir = File.dirname(@assembly.file)
26
- assembly_base = File.basename(@assembly.file, ".*")
27
- assembly_orfs = assembly_base + ".orfs"
28
- @usearch.findorfs @assembly.file, assembly_orfs
29
- assembly_db = File.join(assembly_dir, assembly_base + ".udb")
30
- @usearch.makeudb_ublast assembly_orfs, assembly_db
31
- @assembly.orfs_ublast_db = assembly_db
32
- end
33
- end
34
-
35
- def make_reference_db
36
- unless @reference.ublast_db
37
- reference_dir = File.dirname(@reference.file)
38
- reference_base = File.basename(@reference.file, ".*")
39
- reference_db = File.join(reference_dir, reference_base + ".udb")
40
- @usearch.makeudb_ublast @reference.file, reference_db
41
- @reference.ublast_db = reference_db
42
- return reference_db
43
- end
44
- end
45
-
46
- def reciprocal_align
47
- left2right = @usearch.ublast @assembly.file, @reference.ublast_db
48
- right2left = @usearch.ublast @reference.file, @assembly.orfs_ublast_db
49
- [left2right, right2left]
50
- end
51
-
52
- def parse_results left2right, right2left
53
- l2r_results = self.load_results_file left2right
54
- r2l_results = self.load_results_file right2left
55
- @l2r_hits = self.results_to_hits l2r_results
56
- @r2l_hits = self.results_to_hits r2l_results
57
- @results = {}
58
- @l2r_hits.each_pair do |query, best|
59
- next if best.nil?
60
- tbest = @r2l_hits[best.target]
61
- next if tbest.nil?
62
- if query == tbest.target
63
- @results[query] = best
64
- end
65
- end
66
- end
67
-
68
- # what is this method trying to do? :/
69
- def results_to_hits results
70
- hits = {}
71
- results.each do |hit|
72
- if hits.has_key? hit.query
73
- old_hit = hits[hit.query]
74
- old_eval, old_bits = old_hit.evalue, old_hit.bitscore
75
- if hit.bitscore == nil
76
- p hit
77
- abort "oh noes"
78
- end
79
- if old_bits == nil
80
- p old_hit
81
- raise 'hell'
82
- end
83
- if hit.bitscore > old_bits
84
- hits[hit.query] = hit
85
- elsif hit.bitscore == old_bits && hit.evalue < old_eval
86
- hits[hit.query] = hit
87
- end
88
- else
89
- hits[hit.query] = hit
90
- end
91
- end
92
- hits
93
- end
94
-
95
- def load_results_file file
96
- results = []
97
- File.open(file).each_line do |line|
98
- results << RBHit.new(line.chomp.split("\t"))
99
- end
100
- results
101
- end
102
-
103
- end # ReciprocalAnnotation
104
-
105
- end # Transrate
@@ -1,66 +0,0 @@
1
- module Transrate
2
-
3
- class Usearch
4
-
5
- require 'which'
6
- include Which
7
-
8
- def initialize threads=8
9
- @threads = threads
10
- paths = which('usearch')
11
- if paths.empty?
12
- raise "usearch not found in path. Please ensure usearch is installed and aliased as 'usearch' in your path."
13
- end
14
- @cmd = paths.first
15
- end
16
-
17
- def custom_output_fields
18
- " -userfields query+target+id+alnlen+mism+opens+qlo+qhi+tlo+thi+evalue+bits+tcov"
19
- end
20
-
21
- def ublast query, target, evalue="1e-5"
22
- blast6outfile = "#{File.basename(query)}_#{File.basename(target)}.b6"
23
- unless File.exists? blast6outfile
24
- subcmd = " -ublast #{query}"
25
- subcmd += " -db #{target}"
26
- subcmd += " -evalue #{evalue}"
27
- subcmd += " -userout #{blast6outfile}"
28
- subcmd += self.custom_output_fields
29
- subcmd += " -strand both"
30
- subcmd += " -threads #{@threads}"
31
- self.run subcmd
32
- end
33
- blast6outfile
34
- end
35
-
36
- def makeudb_ublast filepath, output
37
- unless File.exists? output
38
- subcmd = " -makeudb_ublast #{filepath}"
39
- subcmd += " -output #{output}"
40
- self.run subcmd
41
- end
42
- end
43
-
44
- def findorfs filepath, output
45
- if File.exists? output
46
- puts "skipping ORF finding: ORF file already exists at #{output}"
47
- else
48
- subcmd = " -findorfs #{filepath}"
49
- subcmd += " -output #{output}"
50
- subcmd += " -xlat"
51
- subcmd += " -orfstyle 7"
52
- self.run subcmd
53
- end
54
- end
55
-
56
- def run subcmd
57
- subcmd += " -quiet"
58
- ret = `#{@cmd}#{subcmd} 2>&1`
59
- unless $?.exitstatus == 0
60
- puts "usearch command failed: #{subcmd}\noutput:\n#{ret}"
61
- end
62
- end
63
-
64
- end # Usearch
65
-
66
- end # Transrate
data/test/test_test.rb DELETED
@@ -1,41 +0,0 @@
1
- #!/usr/bin/env ruby
2
-
3
- require 'helper'
4
-
5
- class TestTransrate < Test::Unit::TestCase
6
-
7
- context "transrate" do
8
-
9
- setup do
10
- @a = Transrate::Assembly.new("test/assembly.fasta")
11
- @seq1 = "ATGCCCGGGTAG"
12
- end
13
-
14
- should "run metrics on assembly" do
15
- ans = @a.run(2) # using 2 threads
16
- assert_equal ans, true, "should run but returned #{ans}"
17
- end
18
-
19
- should "find longest orf" do
20
- len = @a.orf_length("ATGCCCGGGTAG")
21
- assert_equal len, 3, "expected 4 but got #{len}"
22
- end
23
-
24
- should "find longest orf in file" do
25
- orfs = []
26
- @a.assembly.each do |entry|
27
- l = @a.orf_length entry.seq
28
- orfs << l
29
- end
30
- assert_equal orfs.length, 4
31
- assert_equal orfs, [333, 370, 131, 84]
32
- end
33
-
34
- should "find the mean length" do
35
- ans = @a.run(2)
36
- mean = @a.mean_len
37
- assert_equal mean, 1508.25
38
- end
39
-
40
- end
41
- end