transrate 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +1 -0
- data/LICENSE +2 -15
- data/README.md +14 -132
- data/Rakefile +19 -2
- data/bin/transrate +49 -10
- data/deps/deps.yaml +0 -10
- data/docs/transrate_logo_full.png +0 -0
- data/ext/transrate/extconf.rb +13 -0
- data/ext/transrate/transrate.c +223 -0
- data/lib/transrate.rb +1 -0
- data/lib/transrate/assembly.rb +12 -10
- data/lib/transrate/bowtie2.rb +7 -0
- data/lib/transrate/comparative_metrics.rb +103 -73
- data/lib/transrate/contig.rb +94 -93
- data/lib/transrate/contig_metrics.rb +1 -2
- data/lib/transrate/read_metrics.rb +13 -7
- data/lib/transrate/version.rb +1 -1
- data/test/helper.rb +1 -31
- data/test/test_bin.rb +99 -0
- data/test/test_bowtie.rb +12 -0
- data/test/test_comp_metrics.rb +161 -104
- data/test/test_contig.rb +62 -6
- data/test/test_contig_metrics.rb +2 -2
- data/test/test_inline.rb +2 -2
- data/test/test_transrater.rb +1 -1
- data/transrate.gemspec +5 -4
- metadata +40 -22
data/test/test_contig.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'helper'
|
2
2
|
require 'bio'
|
3
|
+
require 'benchmark'
|
3
4
|
|
4
5
|
class TestContig < Test::Unit::TestCase
|
5
6
|
|
6
7
|
context "Contig" do
|
7
8
|
|
8
9
|
setup do
|
9
|
-
seq = Bio::
|
10
|
+
seq = Bio::FastaFormat.new ">test\nATGCGTGTATATACGCGTAG" # cg=3, gc=2, c*g=20
|
10
11
|
@contig = Transrate::Contig.new seq
|
11
12
|
end
|
12
13
|
|
@@ -23,6 +24,27 @@ class TestContig < Test::Unit::TestCase
|
|
23
24
|
assert_equal 0.0, @contig.prop_n, "proportion of base n"
|
24
25
|
end
|
25
26
|
|
27
|
+
should "calculate dibase composition with ambiguous bases" do
|
28
|
+
seq = Bio::FastaFormat.new ">test\nATGGGNCRYTAG"
|
29
|
+
contig = Transrate::Contig.new seq
|
30
|
+
assert_equal 1, contig.dibase_composition[:at]
|
31
|
+
assert_equal 1, contig.dibase_composition[:nn]
|
32
|
+
assert_equal 1, contig.dibase_composition[:gn]
|
33
|
+
end
|
34
|
+
|
35
|
+
should "benchmark composition" do
|
36
|
+
seq = "GCCGTGAGCTTCTTGATCGAGTTCTTCTCCCGCTTCGCGAACGCCTTGGACTCCTNGCACGGG"
|
37
|
+
seq << "GTCAGCCCCGCGATGTCGGCGGCCGCGGGCGGGGGG"
|
38
|
+
seq = seq * 100000
|
39
|
+
seq = Bio::FastaFormat.new ">test\n"+seq
|
40
|
+
contig = Transrate::Contig.new seq
|
41
|
+
ruby_time = 11 # time taken with the ruby version
|
42
|
+
c_time = Benchmark.realtime do |x|
|
43
|
+
contig.dibase_composition
|
44
|
+
end
|
45
|
+
assert c_time*100 < ruby_time, "c faster than ruby"
|
46
|
+
end
|
47
|
+
|
26
48
|
should "know how many of each two-base pair it contains" do
|
27
49
|
assert_equal 3, @contig.dibase_composition[:cg], "cg count"
|
28
50
|
assert_equal 3, @contig.dibase_composition[:at], "at count"
|
@@ -36,19 +58,53 @@ class TestContig < Test::Unit::TestCase
|
|
36
58
|
end
|
37
59
|
|
38
60
|
should "know its own base-pair skew" do
|
39
|
-
assert_equal 0.
|
40
|
-
assert_equal 0.
|
61
|
+
assert_equal 0.33, @contig.gc_skew.round(2), "gc skew"
|
62
|
+
assert_equal -0.09, @contig.at_skew.round(2), "at skew"
|
41
63
|
end
|
42
64
|
|
43
|
-
should "know its own CpG count and
|
44
|
-
assert_equal
|
45
|
-
assert_equal
|
65
|
+
should "know its own CpG count and ratio" do
|
66
|
+
assert_equal 5, @contig.cpg_count, "cpg count"
|
67
|
+
assert_equal 5.56, @contig.cpg_ratio.round(2), "cpg ratio"
|
68
|
+
end
|
69
|
+
|
70
|
+
should "calculate the CpG ratio" do
|
71
|
+
seq = Bio::FastaFormat.new ">test\nAAACGAAA"
|
72
|
+
contig = Transrate::Contig.new seq
|
73
|
+
assert_equal 8, contig.cpg_ratio, "cpg ratio"
|
46
74
|
end
|
47
75
|
|
48
76
|
should "know the length of its own longest orf" do
|
49
77
|
assert_equal 6, @contig.orf_length, "orf length"
|
50
78
|
end
|
51
79
|
|
80
|
+
should "not break when there is a null byte in the sequence" do
|
81
|
+
seq = Bio::FastaFormat.new ">test\nAAAAAAAAAAAA\0AAAAAAAAAAA"
|
82
|
+
contig = Transrate::Contig.new seq
|
83
|
+
assert_equal 7, contig.orf_length, "orf length"
|
84
|
+
end
|
85
|
+
|
86
|
+
should "not fail on bases that aren't ACGTN" do
|
87
|
+
seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
|
88
|
+
contig = Transrate::Contig.new seq
|
89
|
+
assert_equal 1, contig.base_composition[:n]
|
90
|
+
end
|
91
|
+
|
92
|
+
should "get kmer count with non ACGTN bases" do
|
93
|
+
seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
|
94
|
+
contig = Transrate::Contig.new seq
|
95
|
+
assert_equal 0, contig.kmer_count(6, "RRRRRRRRRRRRRRRR")
|
96
|
+
end
|
97
|
+
|
98
|
+
should "calculate linguistic complexity for a long sequence" do
|
99
|
+
alphabet = ["A", "C", "G", "T"]
|
100
|
+
seq = ""
|
101
|
+
50000.times do
|
102
|
+
seq << alphabet.sample
|
103
|
+
end
|
104
|
+
seq = Bio::FastaFormat.new ">test\n"+seq
|
105
|
+
contig = Transrate::Contig.new seq
|
106
|
+
assert contig.linguistic_complexity(6) > 0.98, "linguistic complexity"
|
107
|
+
end
|
52
108
|
|
53
109
|
should "know its own linguistic complexity" do
|
54
110
|
assert_equal 0.0586, @contig.linguistic_complexity(4).round(4),
|
data/test/test_contig_metrics.rb
CHANGED
@@ -33,12 +33,12 @@ class TestContigMetrics < Test::Unit::TestCase
|
|
33
33
|
|
34
34
|
should "get CpG density" do
|
35
35
|
@contig_metrics.run
|
36
|
-
assert_equal
|
36
|
+
assert_equal 1.51939, @contig_metrics.cpg_ratio.round(5)
|
37
37
|
end
|
38
38
|
|
39
39
|
should "get linguistic complexity" do
|
40
40
|
@contig_metrics.run
|
41
|
-
assert_equal 0.
|
41
|
+
assert_equal 0.26526, @contig_metrics.linguistic_complexity.round(5)
|
42
42
|
end
|
43
43
|
|
44
44
|
should "get the number and proportion of Ns" do
|
data/test/test_inline.rb
CHANGED
@@ -14,7 +14,7 @@ class TestInline < Test::Unit::TestCase
|
|
14
14
|
|
15
15
|
should 'find longest orf in file' do
|
16
16
|
orfs = []
|
17
|
-
@a.assembly.
|
17
|
+
@a.assembly.each_value do |contig|
|
18
18
|
orfs << contig.orf_length
|
19
19
|
end
|
20
20
|
assert_equal 4, orfs.length
|
@@ -22,7 +22,7 @@ class TestInline < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
should 'find longest orf in sequence' do
|
25
|
-
seq = Bio::
|
25
|
+
seq = Bio::FastaFormat.new ">test\nATGCCCCTAGGGTAG"
|
26
26
|
contig = Transrate::Contig.new seq
|
27
27
|
assert_equal 4, contig.orf_length
|
28
28
|
end
|
data/test/test_transrater.rb
CHANGED
@@ -59,7 +59,7 @@ class TestTransrater < Test::Unit::TestCase
|
|
59
59
|
Dir.chdir tmpdir do
|
60
60
|
all = @rater.all_metrics(@left, @right)
|
61
61
|
score = @rater.assembly_score
|
62
|
-
assert_equal 0.
|
62
|
+
assert_equal 0.55815, score.round(5) # regression test
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
data/transrate.gemspec
CHANGED
@@ -14,20 +14,21 @@ Gem::Specification.new do |gem|
|
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split("\n")
|
16
16
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
|
-
gem.require_paths = %w( lib )
|
17
|
+
gem.require_paths = %w( lib ext )
|
18
|
+
gem.extensions = ["ext/transrate/extconf.rb"]
|
18
19
|
|
19
20
|
gem.add_dependency 'yell', '~> 2.0', '>= 2.0.4'
|
20
21
|
gem.add_dependency 'trollop', '~> 2.0'
|
21
|
-
gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.
|
22
|
+
gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.8'
|
22
23
|
gem.add_dependency 'which', '~> 0.0', '>= 0.0.2'
|
23
24
|
gem.add_dependency 'bio', '~> 1.4', '>= 1.4.3'
|
24
25
|
gem.add_dependency 'bio-samtools', '~> 2.0', '>= 2.0.5'
|
25
|
-
gem.add_dependency '
|
26
|
-
gem.add_dependency 'crb-blast', '~> 0.2'
|
26
|
+
gem.add_dependency 'crb-blast', '~> 0.4', '>= 0.4.0'
|
27
27
|
gem.add_dependency 'bettersam', '~> 0.0', '>= 0.0.3'
|
28
28
|
gem.add_dependency 'ruby-prof', '~> 0.15', '>= 0.15.1'
|
29
29
|
|
30
30
|
gem.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
|
31
|
+
gem.add_development_dependency 'rake-compiler', '~> 0.9', '>= 0.9.2'
|
31
32
|
gem.add_development_dependency 'turn', '~> 0.9', '>= 0.9.7'
|
32
33
|
gem.add_development_dependency 'minitest', '~> 4', '>= 4.7.5'
|
33
34
|
gem.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: transrate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Smith-Unna
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yell
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0.0'
|
55
55
|
- - '>='
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version: 0.0.
|
57
|
+
version: 0.0.8
|
58
58
|
type: :runtime
|
59
59
|
prerelease: false
|
60
60
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -64,7 +64,7 @@ dependencies:
|
|
64
64
|
version: '0.0'
|
65
65
|
- - '>='
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version: 0.0.
|
67
|
+
version: 0.0.8
|
68
68
|
- !ruby/object:Gem::Dependency
|
69
69
|
name: which
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
@@ -125,34 +125,26 @@ dependencies:
|
|
125
125
|
- - '>='
|
126
126
|
- !ruby/object:Gem::Version
|
127
127
|
version: 2.0.5
|
128
|
-
- !ruby/object:Gem::Dependency
|
129
|
-
name: RubyInline
|
130
|
-
requirement: !ruby/object:Gem::Requirement
|
131
|
-
requirements:
|
132
|
-
- - ~>
|
133
|
-
- !ruby/object:Gem::Version
|
134
|
-
version: '3.12'
|
135
|
-
type: :runtime
|
136
|
-
prerelease: false
|
137
|
-
version_requirements: !ruby/object:Gem::Requirement
|
138
|
-
requirements:
|
139
|
-
- - ~>
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
version: '3.12'
|
142
128
|
- !ruby/object:Gem::Dependency
|
143
129
|
name: crb-blast
|
144
130
|
requirement: !ruby/object:Gem::Requirement
|
145
131
|
requirements:
|
146
132
|
- - ~>
|
147
133
|
- !ruby/object:Gem::Version
|
148
|
-
version: '0.
|
134
|
+
version: '0.4'
|
135
|
+
- - '>='
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: 0.4.0
|
149
138
|
type: :runtime
|
150
139
|
prerelease: false
|
151
140
|
version_requirements: !ruby/object:Gem::Requirement
|
152
141
|
requirements:
|
153
142
|
- - ~>
|
154
143
|
- !ruby/object:Gem::Version
|
155
|
-
version: '0.
|
144
|
+
version: '0.4'
|
145
|
+
- - '>='
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: 0.4.0
|
156
148
|
- !ruby/object:Gem::Dependency
|
157
149
|
name: bettersam
|
158
150
|
requirement: !ruby/object:Gem::Requirement
|
@@ -213,6 +205,26 @@ dependencies:
|
|
213
205
|
- - '>='
|
214
206
|
- !ruby/object:Gem::Version
|
215
207
|
version: 10.3.2
|
208
|
+
- !ruby/object:Gem::Dependency
|
209
|
+
name: rake-compiler
|
210
|
+
requirement: !ruby/object:Gem::Requirement
|
211
|
+
requirements:
|
212
|
+
- - ~>
|
213
|
+
- !ruby/object:Gem::Version
|
214
|
+
version: '0.9'
|
215
|
+
- - '>='
|
216
|
+
- !ruby/object:Gem::Version
|
217
|
+
version: 0.9.2
|
218
|
+
type: :development
|
219
|
+
prerelease: false
|
220
|
+
version_requirements: !ruby/object:Gem::Requirement
|
221
|
+
requirements:
|
222
|
+
- - ~>
|
223
|
+
- !ruby/object:Gem::Version
|
224
|
+
version: '0.9'
|
225
|
+
- - '>='
|
226
|
+
- !ruby/object:Gem::Version
|
227
|
+
version: 0.9.2
|
216
228
|
- !ruby/object:Gem::Dependency
|
217
229
|
name: turn
|
218
230
|
requirement: !ruby/object:Gem::Requirement
|
@@ -312,7 +324,8 @@ description: ' a library and command-line tool for quality assessment of de-novo
|
|
312
324
|
email: rds45@cam.ac.uk
|
313
325
|
executables:
|
314
326
|
- transrate
|
315
|
-
extensions:
|
327
|
+
extensions:
|
328
|
+
- ext/transrate/extconf.rb
|
316
329
|
extra_rdoc_files: []
|
317
330
|
files:
|
318
331
|
- .gitignore
|
@@ -323,6 +336,9 @@ files:
|
|
323
336
|
- Rakefile
|
324
337
|
- bin/transrate
|
325
338
|
- deps/deps.yaml
|
339
|
+
- docs/transrate_logo_full.png
|
340
|
+
- ext/transrate/extconf.rb
|
341
|
+
- ext/transrate/transrate.c
|
326
342
|
- lib/transrate.rb
|
327
343
|
- lib/transrate/assembly.rb
|
328
344
|
- lib/transrate/bowtie2.rb
|
@@ -348,6 +364,7 @@ files:
|
|
348
364
|
- test/data/sorghum_transcript.fa
|
349
365
|
- test/data/tiny.sam
|
350
366
|
- test/helper.rb
|
367
|
+
- test/test_bin.rb
|
351
368
|
- test/test_bowtie.rb
|
352
369
|
- test/test_cmd.rb
|
353
370
|
- test/test_comp_metrics.rb
|
@@ -367,6 +384,7 @@ post_install_message:
|
|
367
384
|
rdoc_options: []
|
368
385
|
require_paths:
|
369
386
|
- lib
|
387
|
+
- ext
|
370
388
|
required_ruby_version: !ruby/object:Gem::Requirement
|
371
389
|
requirements:
|
372
390
|
- - '>='
|
@@ -379,7 +397,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
379
397
|
version: '0'
|
380
398
|
requirements: []
|
381
399
|
rubyforge_project:
|
382
|
-
rubygems_version: 2.
|
400
|
+
rubygems_version: 2.1.4
|
383
401
|
signing_key:
|
384
402
|
specification_version: 4
|
385
403
|
summary: quality assessment of de-novo transcriptome assemblies
|