transrate 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +1 -0
- data/LICENSE +2 -15
- data/README.md +14 -132
- data/Rakefile +19 -2
- data/bin/transrate +49 -10
- data/deps/deps.yaml +0 -10
- data/docs/transrate_logo_full.png +0 -0
- data/ext/transrate/extconf.rb +13 -0
- data/ext/transrate/transrate.c +223 -0
- data/lib/transrate.rb +1 -0
- data/lib/transrate/assembly.rb +12 -10
- data/lib/transrate/bowtie2.rb +7 -0
- data/lib/transrate/comparative_metrics.rb +103 -73
- data/lib/transrate/contig.rb +94 -93
- data/lib/transrate/contig_metrics.rb +1 -2
- data/lib/transrate/read_metrics.rb +13 -7
- data/lib/transrate/version.rb +1 -1
- data/test/helper.rb +1 -31
- data/test/test_bin.rb +99 -0
- data/test/test_bowtie.rb +12 -0
- data/test/test_comp_metrics.rb +161 -104
- data/test/test_contig.rb +62 -6
- data/test/test_contig_metrics.rb +2 -2
- data/test/test_inline.rb +2 -2
- data/test/test_transrater.rb +1 -1
- data/transrate.gemspec +5 -4
- metadata +40 -22
@@ -1,5 +1,4 @@
|
|
1
1
|
require 'set'
|
2
|
-
require 'inline'
|
3
2
|
|
4
3
|
module Transrate
|
5
4
|
|
@@ -41,7 +40,7 @@ module Transrate
|
|
41
40
|
cpg_count = 0
|
42
41
|
lc = 0
|
43
42
|
k = 6
|
44
|
-
@assembly.assembly.
|
43
|
+
@assembly.assembly.each_value do |contig|
|
45
44
|
total += contig.length
|
46
45
|
a += contig.bases_a
|
47
46
|
c += contig.bases_c
|
@@ -196,6 +196,8 @@ module Transrate
|
|
196
196
|
CSV.open('supported_bridges.csv', 'w') do |f|
|
197
197
|
@bridges.each_pair do |b, count|
|
198
198
|
start, finish = b.to_s.split('<>')
|
199
|
+
@assembly[start].in_bridges += 1
|
200
|
+
@assembly[finish].in_bridges += 1
|
199
201
|
if count > 1
|
200
202
|
f << [start, finish, count]
|
201
203
|
@supported_bridges += 1
|
@@ -215,15 +217,18 @@ module Transrate
|
|
215
217
|
n, tot_length, tot_coverage = 0, 0, 0
|
216
218
|
@assembly.each_with_coverage(bam) do |contig, coverage|
|
217
219
|
next if contig.length < 200
|
218
|
-
|
219
|
-
coverage.each
|
220
|
+
contig.uncovered_bases, total = 0, 0
|
221
|
+
coverage.each do |e|
|
222
|
+
total += e
|
223
|
+
contig.uncovered_bases += 1 if e < 1
|
224
|
+
end
|
220
225
|
tot_length += coverage.length
|
221
226
|
tot_coverage += total
|
222
|
-
|
223
|
-
@n_uncovered_bases +=
|
224
|
-
@n_uncovered_base_contigs += 1 if
|
225
|
-
@n_uncovered_contigs += 1 if
|
226
|
-
@n_lowcovered_contigs += 1 if
|
227
|
+
contig.mean_coverage = total / coverage.length.to_f
|
228
|
+
@n_uncovered_bases += contig.uncovered_bases
|
229
|
+
@n_uncovered_base_contigs += 1 if contig.uncovered_bases > 0
|
230
|
+
@n_uncovered_contigs += 1 if contig.mean_coverage < 1
|
231
|
+
@n_lowcovered_contigs += 1 if contig.mean_coverage < 10
|
227
232
|
end
|
228
233
|
@mean_coverage = (tot_coverage / tot_length.to_f).round(2)
|
229
234
|
@p_uncovered_bases = @n_uncovered_bases / @assembly.n_bases.to_f
|
@@ -236,3 +241,4 @@ module Transrate
|
|
236
241
|
end # ReadMetrics
|
237
242
|
|
238
243
|
end # Transrate
|
244
|
+
|
data/lib/transrate/version.rb
CHANGED
data/test/helper.rb
CHANGED
@@ -11,37 +11,7 @@ require 'minitest/autorun'
|
|
11
11
|
begin; require 'turn/autorun'; rescue LoadError; end
|
12
12
|
require 'shoulda/context'
|
13
13
|
require 'transrate'
|
14
|
+
require 'transrate/transrate.so'
|
14
15
|
|
15
16
|
Turn.config.format = :pretty
|
16
17
|
Turn.config.trace = 5
|
17
|
-
|
18
|
-
# fake CRBBlast class
|
19
|
-
class CRBHelper
|
20
|
-
|
21
|
-
attr_accessor :target_is_prot, :hash
|
22
|
-
def initialize t
|
23
|
-
@target_is_prot = t
|
24
|
-
end
|
25
|
-
|
26
|
-
def reciprocals
|
27
|
-
return @hash
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
# rake Hit class
|
33
|
-
class HitHelper
|
34
|
-
|
35
|
-
attr_accessor :query, :target, :qstart, :qend, :tstart, :tend, :qlen, :tlen
|
36
|
-
def initialize query, target, qstart, qend, tstart, tend, qlen, tlen
|
37
|
-
@query = query
|
38
|
-
@target = target
|
39
|
-
@qstart = qstart
|
40
|
-
@tstart = tstart
|
41
|
-
@tend = tend
|
42
|
-
@qend = qend
|
43
|
-
@qlen = qlen
|
44
|
-
@tlen = tlen
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
data/test/test_bin.rb
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
require 'helper'
|
2
|
+
require 'csv'
|
3
|
+
|
4
|
+
class TestTransrateBin < Test::Unit::TestCase
|
5
|
+
|
6
|
+
context "Transrate" do
|
7
|
+
|
8
|
+
setup do
|
9
|
+
|
10
|
+
end
|
11
|
+
|
12
|
+
teardown do
|
13
|
+
files = ["150uncovered.l.fq.150uncovered.r.fq.assembly.2.bai",
|
14
|
+
"150uncovered.l.fq.150uncovered.r.fq.assembly.2.bam",
|
15
|
+
"150uncovered.l.fq.150uncovered.r.fq.assembly.2.sam",
|
16
|
+
"150uncovered.l.fq.150uncovered.r.fq.assembly.2.sorted.bam",
|
17
|
+
"assembly.2.1.bt2", "assembly.2.2.bt2", "assembly.2.3.bt2",
|
18
|
+
"assembly.2.4.bt2", "assembly.2.fa.coverage",
|
19
|
+
"assembly.2_into_Os.protein.2.1.blast",
|
20
|
+
"assembly.2.nhr", "assembly.2.nin", "assembly.2.nsq",
|
21
|
+
"assembly.2.rev.1.bt2", "assembly.2.rev.2.bt2",
|
22
|
+
"Os.protein.2_into_assembly.2.2.blast",
|
23
|
+
"Os.protein.2.phr", "Os.protein.2.pin", "Os.protein.2.psq",
|
24
|
+
"supported_bridges.csv",
|
25
|
+
"transrate_assemblies.csv",
|
26
|
+
"transrate_contigs.csv"]
|
27
|
+
files.each do |file|
|
28
|
+
File.delete(file) if File.exist?(file)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
should "run help" do
|
33
|
+
c=Transrate::Cmd.new("bundle exec bin/transrate --help")
|
34
|
+
c.run
|
35
|
+
assert_equal 1751, c.stdout.length, "stdout"
|
36
|
+
assert_equal true, c.status.success?, "exit status"
|
37
|
+
end
|
38
|
+
|
39
|
+
should "fail on non existent assembly files" do
|
40
|
+
c=Transrate::Cmd.new("bundle exec bin/transrate --assembly foo.fasta")
|
41
|
+
c.run
|
42
|
+
assert_equal 163, c.stderr.length, "stderr"
|
43
|
+
assert_equal false, c.status.success?, "exit success"
|
44
|
+
end
|
45
|
+
|
46
|
+
should "fail on non existent reference files" do
|
47
|
+
c=Transrate::Cmd.new("bundle exec bin/transrate --reference foo.fasta")
|
48
|
+
c.run
|
49
|
+
assert_equal 104, c.stderr.length, "error"
|
50
|
+
assert_equal false, c.status.success?, "exit status"
|
51
|
+
end
|
52
|
+
|
53
|
+
should "run on test data" do
|
54
|
+
assembly = File.join(File.dirname(__FILE__), 'data', 'assembly.2.fa')
|
55
|
+
reference = File.join(File.dirname(__FILE__), 'data', 'Os.protein.2.fa')
|
56
|
+
left = File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
|
57
|
+
right = File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
|
58
|
+
cmd = "bundle exec bin/transrate --assembly #{assembly}"
|
59
|
+
cmd << " --reference #{reference}"
|
60
|
+
cmd << " --left #{left}"
|
61
|
+
cmd << " --right #{right}"
|
62
|
+
c = Transrate::Cmd.new("#{cmd}")
|
63
|
+
c.run
|
64
|
+
assert_equal true, c.status.success?, "exit status"
|
65
|
+
assert File.exist?("transrate_assemblies.csv"), "csv file doesn't exit"
|
66
|
+
assert File.exist?("transrate_contigs.csv"), "csv file doesn't exit"
|
67
|
+
hash = {}
|
68
|
+
CSV.foreach("transrate_assemblies.csv", :headers => true,
|
69
|
+
:header_converters => :symbol,
|
70
|
+
:converters => :all) do |row|
|
71
|
+
row.headers
|
72
|
+
row.fields
|
73
|
+
row.headers.zip(row.fields).each do |header, field|
|
74
|
+
hash[header]=field
|
75
|
+
end
|
76
|
+
end
|
77
|
+
assert_equal 10331, hash[:n_bases], "number of bases"
|
78
|
+
assert_equal 1566, hash[:n50], "n50"
|
79
|
+
assert_equal 10, hash[:n_refs_with_crbb], "number of crb hits"
|
80
|
+
end
|
81
|
+
|
82
|
+
should "fail when one of multiple assemblies is missing" do
|
83
|
+
assembly = File.join(File.dirname(__FILE__), 'data', 'assembly.2.fa')
|
84
|
+
assembly2 = File.join(File.dirname(__FILE__), 'data', 'foo.fa')
|
85
|
+
reference = File.join(File.dirname(__FILE__), 'data', 'Os.protein.2.fa')
|
86
|
+
left = File.join(File.dirname(__FILE__), 'data', '150uncovered.l.fq')
|
87
|
+
right = File.join(File.dirname(__FILE__), 'data', '150uncovered.r.fq')
|
88
|
+
cmd = "bundle exec bin/transrate "
|
89
|
+
cmd << " --assembly #{assembly},#{assembly2}"
|
90
|
+
cmd << " --reference #{reference}"
|
91
|
+
cmd << " --left #{left}"
|
92
|
+
cmd << " --right #{right}"
|
93
|
+
c = Transrate::Cmd.new("#{cmd}")
|
94
|
+
c.run
|
95
|
+
assert_equal false, c.status.success?, "exit status"
|
96
|
+
end
|
97
|
+
|
98
|
+
end
|
99
|
+
end
|
data/test/test_bowtie.rb
CHANGED
@@ -50,5 +50,17 @@ class TestBowtie < Test::Unit::TestCase
|
|
50
50
|
end
|
51
51
|
end
|
52
52
|
end
|
53
|
+
|
54
|
+
should "raise error when bowtie fails" do
|
55
|
+
not_reads = File.join(File.dirname(__FILE__), 'data', 'not_a_file.fq')
|
56
|
+
Dir.mktmpdir do |tmpdir|
|
57
|
+
Dir.chdir tmpdir do
|
58
|
+
assert_raise Transrate::Bowtie2Error do
|
59
|
+
@mapper.build_index @reference
|
60
|
+
@mapper.map_reads(@reference, @left, not_reads)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
53
65
|
end
|
54
66
|
end
|
data/test/test_comp_metrics.rb
CHANGED
@@ -1,5 +1,41 @@
|
|
1
1
|
require 'helper'
|
2
2
|
|
3
|
+
module CRB_Blast
|
4
|
+
class CRB_Blast
|
5
|
+
def change_hit(query_name, target_name, qstart, qend, tstart, tend, qlen, tlen)
|
6
|
+
hits = @reciprocals[query_name]
|
7
|
+
hits.each do |hit|
|
8
|
+
if hit.target == target_name
|
9
|
+
hit.qstart = qstart
|
10
|
+
hit.qend = qend
|
11
|
+
hit.tstart = tstart
|
12
|
+
hit.tend = tend
|
13
|
+
hit.qlen = qlen
|
14
|
+
hit.tlen = tlen
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def add_hit(query_name, target_name, qstart, qend, tstart, tend, qlen, tlen)
|
20
|
+
@reciprocals[query_name] ||= []
|
21
|
+
list = Array.new(14)
|
22
|
+
list[0] = query_name
|
23
|
+
list[1] = target_name
|
24
|
+
list[6] = qstart
|
25
|
+
list[7] = qend
|
26
|
+
list[8] = tstart
|
27
|
+
list[9] = tend
|
28
|
+
list[12] = qlen
|
29
|
+
list[13] = tlen
|
30
|
+
@reciprocals[query_name] << Hit.new(list)
|
31
|
+
end
|
32
|
+
|
33
|
+
def remove_hit(query_name)
|
34
|
+
@reciprocals.delete(query_name)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
3
39
|
class TestCompMetrics < Test::Unit::TestCase
|
4
40
|
|
5
41
|
context "ComparativeMetrics" do
|
@@ -11,13 +47,14 @@ class TestCompMetrics < Test::Unit::TestCase
|
|
11
47
|
targetpath = File.join(File.dirname(__FILE__),
|
12
48
|
'data',
|
13
49
|
'Os.protein.2.fa')
|
14
|
-
assembly = Transrate::Assembly.new(querypath)
|
15
|
-
|
50
|
+
@assembly = Transrate::Assembly.new(querypath)
|
51
|
+
@q_ids = @assembly.assembly.keys
|
52
|
+
@reference = Transrate::Assembly.new(targetpath)
|
53
|
+
@t_ids = @reference.assembly.keys
|
16
54
|
threads = 8
|
17
|
-
@comp = Transrate::ComparativeMetrics.new(assembly, reference, threads)
|
55
|
+
@comp = Transrate::ComparativeMetrics.new(@assembly, @reference, threads)
|
18
56
|
end
|
19
57
|
|
20
|
-
|
21
58
|
should "run metrics on assembly" do
|
22
59
|
Dir.mktmpdir do |tmpdir|
|
23
60
|
Dir.chdir tmpdir do
|
@@ -27,82 +64,127 @@ class TestCompMetrics < Test::Unit::TestCase
|
|
27
64
|
end
|
28
65
|
end
|
29
66
|
|
30
|
-
should "calculate
|
31
|
-
crb =
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
#
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
#
|
50
|
-
#
|
51
|
-
#
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
#
|
56
|
-
#
|
57
|
-
#
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
crb.
|
70
|
-
|
71
|
-
|
67
|
+
should "calculate reference coverage" do
|
68
|
+
crb = @comp.reciprocal_best_blast
|
69
|
+
# change the results so i know what i have
|
70
|
+
# qstart, qend, tstart, tend, qlen, tlen
|
71
|
+
#
|
72
|
+
# Q |------------|
|
73
|
+
# T1 |-------------------------|
|
74
|
+
crb.change_hit("scaf_Os03g60760.1", "LOC_Os03g60760.1",
|
75
|
+
1, 300, 101, 200, 300, 200) # 0.5
|
76
|
+
@reference["LOC_Os03g60760.1"].seq = "A"*200
|
77
|
+
#
|
78
|
+
# Q1 |----------|
|
79
|
+
# Q2 |------------|
|
80
|
+
# T2 |-------------------------------|
|
81
|
+
crb.change_hit("scaf_Os10g39590.1", "LOC_Os10g39590.1",
|
82
|
+
1, 150, 51, 100, 150, 200) # 0.25
|
83
|
+
crb.add_hit("scaf_Os10g39590.1", "LOC_Os10g39590.1",
|
84
|
+
1, 150, 151, 200, 150, 200) # 0.25
|
85
|
+
@reference["LOC_Os10g39590.1"].seq = "A"*200
|
86
|
+
#
|
87
|
+
# adding first block [151..300] scaf_Os09g38670.1
|
88
|
+
# 450 / 600.0
|
89
|
+
# LOC_Os09g38670.1 0.75
|
90
|
+
|
91
|
+
#
|
92
|
+
#
|
93
|
+
# Q1 |-----------|
|
94
|
+
# Q2 |----------------------|
|
95
|
+
# T3 |-------------------------------|
|
96
|
+
crb.change_hit("scaf_Os09g38670.1", "LOC_Os09g38670.1",
|
97
|
+
1, 150, 51, 100, 150, 200) # 0.25
|
98
|
+
crb.add_hit("scaf_Os09g38670.1", "LOC_Os09g38670.1",
|
99
|
+
1, 450, 26, 175, 450, 200) # 0.75
|
100
|
+
@reference["LOC_Os09g38670.1"].seq = "A"*200
|
101
|
+
|
102
|
+
#
|
103
|
+
# Q1 |----------------------|
|
104
|
+
# Q2 |-----------|
|
105
|
+
# T4 |-------------------------------|
|
106
|
+
crb.change_hit("scaf_Os12g21920.1", "LOC_Os12g21920.1", #
|
107
|
+
1, 450, 26, 175, 450, 200) # 0.75
|
108
|
+
crb.add_hit("scaf_Os12g21920.1", "LOC_Os12g21920.1",
|
109
|
+
1, 150, 51, 100, 150, 200) # 0.25
|
110
|
+
@reference["LOC_Os12g21920.1"].seq = "A"*200
|
111
|
+
|
112
|
+
|
113
|
+
#
|
114
|
+
# Q1 |------|
|
115
|
+
# Q2 |--------|
|
116
|
+
# Q3 |-----------------|
|
117
|
+
# T5 |-------------------------------|
|
118
|
+
crb.change_hit("scaf_Os01g36294.1", "LOC_Os01g36294.1", #
|
119
|
+
1, 300, 51, 100, 300, 400)
|
120
|
+
crb.add_hit("scaf_Os01g36294.1", "LOC_Os01g36294.1",
|
121
|
+
1, 300, 200, 250, 300, 400)
|
122
|
+
crb.add_hit("scaf_Os01g36294.1", "LOC_Os01g36294.1",
|
123
|
+
1, 300, 75, 225, 300, 400)
|
124
|
+
@reference["LOC_Os01g36294.1"].seq = "A"*400
|
125
|
+
|
126
|
+
crb.change_hit("scaf_Os12g22750.1", "LOC_Os12g22750.1",
|
127
|
+
1, 300, 101, 200, 300, 200) # 0.5 # 300/600
|
128
|
+
@reference["LOC_Os12g22750.1"].seq = "A"*200
|
129
|
+
|
130
|
+
crb.change_hit("scaf_Os02g55190.1", "LOC_Os02g55190.1",
|
131
|
+
1, 300, 101, 200, 300, 200) # 0.5 # 300/600
|
132
|
+
@reference["LOC_Os02g55190.1"].seq = "A"*200
|
133
|
+
|
134
|
+
crb.change_hit("scaf_Os03g56500.1", "LOC_Os03g56500.1",
|
135
|
+
1, 300, 101, 200, 300, 400) # 0.25
|
136
|
+
crb.change_hit("scaf_Os03g56500.2", "LOC_Os03g56500.1",
|
137
|
+
1, 300, 201, 300, 300, 400) # 0.25 # 600 / 1200
|
138
|
+
@reference["LOC_Os03g56500.1"].seq = "A"*400
|
139
|
+
|
140
|
+
crb.change_hit("scaf_Os03g56724.1", "LOC_Os03g56724.1",
|
141
|
+
1, 300, 101, 200, 300, 200) # 300/600 = 0.5
|
142
|
+
@reference["LOC_Os03g56724.1"].seq = "A"*200
|
143
|
+
|
144
|
+
crb.remove_hit("scaf_Os01g11360.1")
|
145
|
+
|
146
|
+
@reference["LOC_Os03g08270.3"].seq = "A"*200
|
147
|
+
@reference["LOC_Os10g41970.1"].seq = "A"*200
|
148
|
+
@reference["LOC_Os09g26780.1"].seq = "A"*200
|
149
|
+
@reference["LOC_Os12g24659.1"].seq = "A"*200
|
150
|
+
@reference["LOC_Os01g36410.1"].seq = "A"*200
|
151
|
+
@reference["LOC_Os12g22780.1"].seq = "A"*200
|
152
|
+
@reference["LOC_Os02g56470.1"].seq = "A"*200
|
153
|
+
@reference["LOC_Os03g30530.1"].seq = "A"*200
|
154
|
+
@reference["LOC_Os03g49850.1"].seq = "A"*200
|
155
|
+
@reference["LOC_Os01g11360.1"].seq = "A"*200
|
156
|
+
@reference["LOC_Os01g44140.1"].seq = "A"*200
|
157
|
+
|
158
|
+
# total_length of references should be 4400
|
159
|
+
|
160
|
+
cov = @comp.coverage crb
|
161
|
+
assert_equal 3600/4400.0, cov, "reference coverage"
|
72
162
|
end
|
73
163
|
|
74
164
|
should "calculate potential chimera count" do
|
75
|
-
crb =
|
165
|
+
crb = @comp.reciprocal_best_blast
|
166
|
+
# # T1 |---------|
|
167
|
+
# # T2 |---------|
|
168
|
+
# # Q1 |----------------------------| # chimera = true
|
169
|
+
|
170
|
+
crb.remove_hit("scaf_Os10g39590.1")
|
171
|
+
crb.add_hit("scaf_Os10g39590.1", "LOC_Os03g60760.1",
|
172
|
+
1, 150, 51, 100, 400, 60) # 0.25
|
173
|
+
crb.add_hit("scaf_Os10g39590.1", "LOC_Os10g39590.1",
|
174
|
+
200, 350, 51, 100, 400, 60) # 0.25
|
175
|
+
|
176
|
+
# # T3 |---------|
|
177
|
+
# # T3 |---------|
|
178
|
+
# # Q2 |----------------------------|
|
179
|
+
# # chimera = true because the reference has the region 1-100 duplicated
|
180
|
+
crb.remove_hit("scaf_Os12g21920.1")
|
181
|
+
crb.add_hit("scaf_Os12g21920.1", "LOC_Os12g21920.1",
|
182
|
+
1, 150, 55, 105, 400, 60) # 0.25
|
183
|
+
crb.add_hit("scaf_Os12g21920.1", "LOC_Os12g21920.1",
|
184
|
+
200, 350, 51, 100, 400, 60) # 0.25
|
76
185
|
|
77
|
-
hash = Hash.new
|
78
|
-
(1..3).each do |i|
|
79
|
-
hash["q#{i}"] = []
|
80
|
-
end
|
81
|
-
|
82
|
-
# T1 |---------|
|
83
|
-
# T2 |---------|
|
84
|
-
# Q1 |----------------------------| # chimera = true
|
85
|
-
hash["q1"] << HitHelper.new("q1", "t1", 101, 200, 1, 100, 500, 100)
|
86
|
-
hash["q1"] << HitHelper.new("q1", "t2", 301, 400, 1, 100, 400, 100)
|
87
|
-
|
88
|
-
|
89
|
-
# T3 |---------|
|
90
|
-
# T3 |---------|
|
91
|
-
# Q2 |----------------------------|
|
92
|
-
# chimera = true because the reference has the region 1-100 duplicated
|
93
|
-
hash["q2"] << HitHelper.new("q2", "t3", 101, 200, 1, 100, 500, 100)
|
94
|
-
hash["q2"] << HitHelper.new("q2", "t3", 301, 400, 1, 100, 400, 100)
|
95
|
-
|
96
|
-
# # T4 |---------|
|
97
|
-
# # T4 |---------|
|
98
|
-
# # Q3 |----------------------------|
|
99
|
-
# # chimera = false because the reference
|
100
|
-
hash["q3"] << HitHelper.new("q3", "t4", 101, 200, 1, 100, 500, 200)
|
101
|
-
hash["q3"] << HitHelper.new("q3", "t4", 301, 400, 101, 200, 400, 200)
|
102
|
-
|
103
|
-
crb.hash = hash
|
104
186
|
@comp.chimeras crb
|
105
|
-
assert_equal 0
|
187
|
+
assert_equal 2/11.0, @comp.p_chimeras
|
106
188
|
end
|
107
189
|
|
108
190
|
should "calculate overlap amount" do
|
@@ -116,8 +198,8 @@ class TestCompMetrics < Test::Unit::TestCase
|
|
116
198
|
Dir.mktmpdir do |tmpdir|
|
117
199
|
Dir.chdir tmpdir do
|
118
200
|
@comp.run
|
119
|
-
assert_equal 11, @comp.comp_stats[:
|
120
|
-
assert_equal 11/13.0, @comp.comp_stats[:
|
201
|
+
assert_equal 11, @comp.comp_stats[:n_contigs_with_CRBB]
|
202
|
+
assert_equal 11/13.0, @comp.comp_stats[:p_contigs_with_CRBB]
|
121
203
|
end
|
122
204
|
end
|
123
205
|
end
|
@@ -126,8 +208,8 @@ class TestCompMetrics < Test::Unit::TestCase
|
|
126
208
|
Dir.mktmpdir do |tmpdir|
|
127
209
|
Dir.chdir tmpdir do
|
128
210
|
@comp.run
|
129
|
-
assert_equal 10, @comp.comp_stats[:
|
130
|
-
assert_equal 0.5, @comp.comp_stats[:
|
211
|
+
assert_equal 10, @comp.comp_stats[:n_refs_with_CRBB]
|
212
|
+
assert_equal 0.5, @comp.comp_stats[:p_refs_with_CRBB]
|
131
213
|
end
|
132
214
|
end
|
133
215
|
end
|
@@ -148,30 +230,5 @@ class TestCompMetrics < Test::Unit::TestCase
|
|
148
230
|
end
|
149
231
|
end
|
150
232
|
|
151
|
-
should "number of reference sequences coverage" do
|
152
|
-
# n&p of reference sequences covered to (25, 50, 75, 85, 95%)
|
153
|
-
# of their length by CRB-BLAST hit
|
154
|
-
crb = CRBHelper.new(false)
|
155
|
-
|
156
|
-
hash = Hash.new
|
157
|
-
(1..5).each do |i|
|
158
|
-
hash["q#{i}"] = []
|
159
|
-
end
|
160
|
-
hash["q1"] << HitHelper.new("q1", "t1", 1, 250, 101, 350, 250, 1000)
|
161
|
-
hash["q2"] << HitHelper.new("q2", "t2", 1, 500, 101, 600, 500, 1000)
|
162
|
-
hash["q3"] << HitHelper.new("q3", "t3", 1, 750, 101, 850, 750, 1000)
|
163
|
-
hash["q4"] << HitHelper.new("q4", "t4", 1, 850, 101, 950, 850, 1000)
|
164
|
-
hash["q5"] << HitHelper.new("q5", "t5", 1, 950, 1, 950, 950, 1000)
|
165
|
-
|
166
|
-
crb.hash = hash
|
167
|
-
ohr = @comp.ortholog_hit_ratio crb
|
168
|
-
stats = @comp.comp_stats
|
169
|
-
assert_equal 5, stats[:cov25]
|
170
|
-
assert_equal 4, stats[:cov50]
|
171
|
-
assert_equal 3, stats[:cov75]
|
172
|
-
assert_equal 2, stats[:cov85]
|
173
|
-
assert_equal 1, stats[:cov95]
|
174
|
-
end
|
175
|
-
|
176
233
|
end
|
177
234
|
end
|