transrate 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +1 -0
- data/LICENSE +2 -15
- data/README.md +14 -132
- data/Rakefile +19 -2
- data/bin/transrate +49 -10
- data/deps/deps.yaml +0 -10
- data/docs/transrate_logo_full.png +0 -0
- data/ext/transrate/extconf.rb +13 -0
- data/ext/transrate/transrate.c +223 -0
- data/lib/transrate.rb +1 -0
- data/lib/transrate/assembly.rb +12 -10
- data/lib/transrate/bowtie2.rb +7 -0
- data/lib/transrate/comparative_metrics.rb +103 -73
- data/lib/transrate/contig.rb +94 -93
- data/lib/transrate/contig_metrics.rb +1 -2
- data/lib/transrate/read_metrics.rb +13 -7
- data/lib/transrate/version.rb +1 -1
- data/test/helper.rb +1 -31
- data/test/test_bin.rb +99 -0
- data/test/test_bowtie.rb +12 -0
- data/test/test_comp_metrics.rb +161 -104
- data/test/test_contig.rb +62 -6
- data/test/test_contig_metrics.rb +2 -2
- data/test/test_inline.rb +2 -2
- data/test/test_transrater.rb +1 -1
- data/transrate.gemspec +5 -4
- metadata +40 -22
data/test/test_contig.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
require 'helper'
|
2
2
|
require 'bio'
|
3
|
+
require 'benchmark'
|
3
4
|
|
4
5
|
class TestContig < Test::Unit::TestCase
|
5
6
|
|
6
7
|
context "Contig" do
|
7
8
|
|
8
9
|
setup do
|
9
|
-
seq = Bio::
|
10
|
+
seq = Bio::FastaFormat.new ">test\nATGCGTGTATATACGCGTAG" # cg=3, gc=2, c*g=20
|
10
11
|
@contig = Transrate::Contig.new seq
|
11
12
|
end
|
12
13
|
|
@@ -23,6 +24,27 @@ class TestContig < Test::Unit::TestCase
|
|
23
24
|
assert_equal 0.0, @contig.prop_n, "proportion of base n"
|
24
25
|
end
|
25
26
|
|
27
|
+
should "calculate dibase composition with ambiguous bases" do
|
28
|
+
seq = Bio::FastaFormat.new ">test\nATGGGNCRYTAG"
|
29
|
+
contig = Transrate::Contig.new seq
|
30
|
+
assert_equal 1, contig.dibase_composition[:at]
|
31
|
+
assert_equal 1, contig.dibase_composition[:nn]
|
32
|
+
assert_equal 1, contig.dibase_composition[:gn]
|
33
|
+
end
|
34
|
+
|
35
|
+
should "benchmark composition" do
|
36
|
+
seq = "GCCGTGAGCTTCTTGATCGAGTTCTTCTCCCGCTTCGCGAACGCCTTGGACTCCTNGCACGGG"
|
37
|
+
seq << "GTCAGCCCCGCGATGTCGGCGGCCGCGGGCGGGGGG"
|
38
|
+
seq = seq * 100000
|
39
|
+
seq = Bio::FastaFormat.new ">test\n"+seq
|
40
|
+
contig = Transrate::Contig.new seq
|
41
|
+
ruby_time = 11 # time taken with the ruby version
|
42
|
+
c_time = Benchmark.realtime do |x|
|
43
|
+
contig.dibase_composition
|
44
|
+
end
|
45
|
+
assert c_time*100 < ruby_time, "c faster than ruby"
|
46
|
+
end
|
47
|
+
|
26
48
|
should "know how many of each two-base pair it contains" do
|
27
49
|
assert_equal 3, @contig.dibase_composition[:cg], "cg count"
|
28
50
|
assert_equal 3, @contig.dibase_composition[:at], "at count"
|
@@ -36,19 +58,53 @@ class TestContig < Test::Unit::TestCase
|
|
36
58
|
end
|
37
59
|
|
38
60
|
should "know its own base-pair skew" do
|
39
|
-
assert_equal 0.
|
40
|
-
assert_equal 0.
|
61
|
+
assert_equal 0.33, @contig.gc_skew.round(2), "gc skew"
|
62
|
+
assert_equal -0.09, @contig.at_skew.round(2), "at skew"
|
41
63
|
end
|
42
64
|
|
43
|
-
should "know its own CpG count and
|
44
|
-
assert_equal
|
45
|
-
assert_equal
|
65
|
+
should "know its own CpG count and ratio" do
|
66
|
+
assert_equal 5, @contig.cpg_count, "cpg count"
|
67
|
+
assert_equal 5.56, @contig.cpg_ratio.round(2), "cpg ratio"
|
68
|
+
end
|
69
|
+
|
70
|
+
should "calculate the CpG ratio" do
|
71
|
+
seq = Bio::FastaFormat.new ">test\nAAACGAAA"
|
72
|
+
contig = Transrate::Contig.new seq
|
73
|
+
assert_equal 8, contig.cpg_ratio, "cpg ratio"
|
46
74
|
end
|
47
75
|
|
48
76
|
should "know the length of its own longest orf" do
|
49
77
|
assert_equal 6, @contig.orf_length, "orf length"
|
50
78
|
end
|
51
79
|
|
80
|
+
should "not break when there is a null byte in the sequence" do
|
81
|
+
seq = Bio::FastaFormat.new ">test\nAAAAAAAAAAAA\0AAAAAAAAAAA"
|
82
|
+
contig = Transrate::Contig.new seq
|
83
|
+
assert_equal 7, contig.orf_length, "orf length"
|
84
|
+
end
|
85
|
+
|
86
|
+
should "not fail on bases that aren't ACGTN" do
|
87
|
+
seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
|
88
|
+
contig = Transrate::Contig.new seq
|
89
|
+
assert_equal 1, contig.base_composition[:n]
|
90
|
+
end
|
91
|
+
|
92
|
+
should "get kmer count with non ACGTN bases" do
|
93
|
+
seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
|
94
|
+
contig = Transrate::Contig.new seq
|
95
|
+
assert_equal 0, contig.kmer_count(6, "RRRRRRRRRRRRRRRR")
|
96
|
+
end
|
97
|
+
|
98
|
+
should "calculate linguistic complexity for a long sequence" do
|
99
|
+
alphabet = ["A", "C", "G", "T"]
|
100
|
+
seq = ""
|
101
|
+
50000.times do
|
102
|
+
seq << alphabet.sample
|
103
|
+
end
|
104
|
+
seq = Bio::FastaFormat.new ">test\n"+seq
|
105
|
+
contig = Transrate::Contig.new seq
|
106
|
+
assert contig.linguistic_complexity(6) > 0.98, "linguistic complexity"
|
107
|
+
end
|
52
108
|
|
53
109
|
should "know its own linguistic complexity" do
|
54
110
|
assert_equal 0.0586, @contig.linguistic_complexity(4).round(4),
|
data/test/test_contig_metrics.rb
CHANGED
@@ -33,12 +33,12 @@ class TestContigMetrics < Test::Unit::TestCase
|
|
33
33
|
|
34
34
|
should "get CpG density" do
|
35
35
|
@contig_metrics.run
|
36
|
-
assert_equal
|
36
|
+
assert_equal 1.51939, @contig_metrics.cpg_ratio.round(5)
|
37
37
|
end
|
38
38
|
|
39
39
|
should "get linguistic complexity" do
|
40
40
|
@contig_metrics.run
|
41
|
-
assert_equal 0.
|
41
|
+
assert_equal 0.26526, @contig_metrics.linguistic_complexity.round(5)
|
42
42
|
end
|
43
43
|
|
44
44
|
should "get the number and proportion of Ns" do
|
data/test/test_inline.rb
CHANGED
@@ -14,7 +14,7 @@ class TestInline < Test::Unit::TestCase
|
|
14
14
|
|
15
15
|
should 'find longest orf in file' do
|
16
16
|
orfs = []
|
17
|
-
@a.assembly.
|
17
|
+
@a.assembly.each_value do |contig|
|
18
18
|
orfs << contig.orf_length
|
19
19
|
end
|
20
20
|
assert_equal 4, orfs.length
|
@@ -22,7 +22,7 @@ class TestInline < Test::Unit::TestCase
|
|
22
22
|
end
|
23
23
|
|
24
24
|
should 'find longest orf in sequence' do
|
25
|
-
seq = Bio::
|
25
|
+
seq = Bio::FastaFormat.new ">test\nATGCCCCTAGGGTAG"
|
26
26
|
contig = Transrate::Contig.new seq
|
27
27
|
assert_equal 4, contig.orf_length
|
28
28
|
end
|
data/test/test_transrater.rb
CHANGED
@@ -59,7 +59,7 @@ class TestTransrater < Test::Unit::TestCase
|
|
59
59
|
Dir.chdir tmpdir do
|
60
60
|
all = @rater.all_metrics(@left, @right)
|
61
61
|
score = @rater.assembly_score
|
62
|
-
assert_equal 0.
|
62
|
+
assert_equal 0.55815, score.round(5) # regression test
|
63
63
|
end
|
64
64
|
end
|
65
65
|
end
|
data/transrate.gemspec
CHANGED
@@ -14,20 +14,21 @@ Gem::Specification.new do |gem|
|
|
14
14
|
|
15
15
|
gem.files = `git ls-files`.split("\n")
|
16
16
|
gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
17
|
-
gem.require_paths = %w( lib )
|
17
|
+
gem.require_paths = %w( lib ext )
|
18
|
+
gem.extensions = ["ext/transrate/extconf.rb"]
|
18
19
|
|
19
20
|
gem.add_dependency 'yell', '~> 2.0', '>= 2.0.4'
|
20
21
|
gem.add_dependency 'trollop', '~> 2.0'
|
21
|
-
gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.
|
22
|
+
gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.8'
|
22
23
|
gem.add_dependency 'which', '~> 0.0', '>= 0.0.2'
|
23
24
|
gem.add_dependency 'bio', '~> 1.4', '>= 1.4.3'
|
24
25
|
gem.add_dependency 'bio-samtools', '~> 2.0', '>= 2.0.5'
|
25
|
-
gem.add_dependency '
|
26
|
-
gem.add_dependency 'crb-blast', '~> 0.2'
|
26
|
+
gem.add_dependency 'crb-blast', '~> 0.4', '>= 0.4.0'
|
27
27
|
gem.add_dependency 'bettersam', '~> 0.0', '>= 0.0.3'
|
28
28
|
gem.add_dependency 'ruby-prof', '~> 0.15', '>= 0.15.1'
|
29
29
|
|
30
30
|
gem.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
|
31
|
+
gem.add_development_dependency 'rake-compiler', '~> 0.9', '>= 0.9.2'
|
31
32
|
gem.add_development_dependency 'turn', '~> 0.9', '>= 0.9.7'
|
32
33
|
gem.add_development_dependency 'minitest', '~> 4', '>= 4.7.5'
|
33
34
|
gem.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: transrate
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Richard Smith-Unna
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2014-07-
|
12
|
+
date: 2014-07-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: yell
|
@@ -54,7 +54,7 @@ dependencies:
|
|
54
54
|
version: '0.0'
|
55
55
|
- - '>='
|
56
56
|
- !ruby/object:Gem::Version
|
57
|
-
version: 0.0.
|
57
|
+
version: 0.0.8
|
58
58
|
type: :runtime
|
59
59
|
prerelease: false
|
60
60
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -64,7 +64,7 @@ dependencies:
|
|
64
64
|
version: '0.0'
|
65
65
|
- - '>='
|
66
66
|
- !ruby/object:Gem::Version
|
67
|
-
version: 0.0.
|
67
|
+
version: 0.0.8
|
68
68
|
- !ruby/object:Gem::Dependency
|
69
69
|
name: which
|
70
70
|
requirement: !ruby/object:Gem::Requirement
|
@@ -125,34 +125,26 @@ dependencies:
|
|
125
125
|
- - '>='
|
126
126
|
- !ruby/object:Gem::Version
|
127
127
|
version: 2.0.5
|
128
|
-
- !ruby/object:Gem::Dependency
|
129
|
-
name: RubyInline
|
130
|
-
requirement: !ruby/object:Gem::Requirement
|
131
|
-
requirements:
|
132
|
-
- - ~>
|
133
|
-
- !ruby/object:Gem::Version
|
134
|
-
version: '3.12'
|
135
|
-
type: :runtime
|
136
|
-
prerelease: false
|
137
|
-
version_requirements: !ruby/object:Gem::Requirement
|
138
|
-
requirements:
|
139
|
-
- - ~>
|
140
|
-
- !ruby/object:Gem::Version
|
141
|
-
version: '3.12'
|
142
128
|
- !ruby/object:Gem::Dependency
|
143
129
|
name: crb-blast
|
144
130
|
requirement: !ruby/object:Gem::Requirement
|
145
131
|
requirements:
|
146
132
|
- - ~>
|
147
133
|
- !ruby/object:Gem::Version
|
148
|
-
version: '0.
|
134
|
+
version: '0.4'
|
135
|
+
- - '>='
|
136
|
+
- !ruby/object:Gem::Version
|
137
|
+
version: 0.4.0
|
149
138
|
type: :runtime
|
150
139
|
prerelease: false
|
151
140
|
version_requirements: !ruby/object:Gem::Requirement
|
152
141
|
requirements:
|
153
142
|
- - ~>
|
154
143
|
- !ruby/object:Gem::Version
|
155
|
-
version: '0.
|
144
|
+
version: '0.4'
|
145
|
+
- - '>='
|
146
|
+
- !ruby/object:Gem::Version
|
147
|
+
version: 0.4.0
|
156
148
|
- !ruby/object:Gem::Dependency
|
157
149
|
name: bettersam
|
158
150
|
requirement: !ruby/object:Gem::Requirement
|
@@ -213,6 +205,26 @@ dependencies:
|
|
213
205
|
- - '>='
|
214
206
|
- !ruby/object:Gem::Version
|
215
207
|
version: 10.3.2
|
208
|
+
- !ruby/object:Gem::Dependency
|
209
|
+
name: rake-compiler
|
210
|
+
requirement: !ruby/object:Gem::Requirement
|
211
|
+
requirements:
|
212
|
+
- - ~>
|
213
|
+
- !ruby/object:Gem::Version
|
214
|
+
version: '0.9'
|
215
|
+
- - '>='
|
216
|
+
- !ruby/object:Gem::Version
|
217
|
+
version: 0.9.2
|
218
|
+
type: :development
|
219
|
+
prerelease: false
|
220
|
+
version_requirements: !ruby/object:Gem::Requirement
|
221
|
+
requirements:
|
222
|
+
- - ~>
|
223
|
+
- !ruby/object:Gem::Version
|
224
|
+
version: '0.9'
|
225
|
+
- - '>='
|
226
|
+
- !ruby/object:Gem::Version
|
227
|
+
version: 0.9.2
|
216
228
|
- !ruby/object:Gem::Dependency
|
217
229
|
name: turn
|
218
230
|
requirement: !ruby/object:Gem::Requirement
|
@@ -312,7 +324,8 @@ description: ' a library and command-line tool for quality assessment of de-novo
|
|
312
324
|
email: rds45@cam.ac.uk
|
313
325
|
executables:
|
314
326
|
- transrate
|
315
|
-
extensions:
|
327
|
+
extensions:
|
328
|
+
- ext/transrate/extconf.rb
|
316
329
|
extra_rdoc_files: []
|
317
330
|
files:
|
318
331
|
- .gitignore
|
@@ -323,6 +336,9 @@ files:
|
|
323
336
|
- Rakefile
|
324
337
|
- bin/transrate
|
325
338
|
- deps/deps.yaml
|
339
|
+
- docs/transrate_logo_full.png
|
340
|
+
- ext/transrate/extconf.rb
|
341
|
+
- ext/transrate/transrate.c
|
326
342
|
- lib/transrate.rb
|
327
343
|
- lib/transrate/assembly.rb
|
328
344
|
- lib/transrate/bowtie2.rb
|
@@ -348,6 +364,7 @@ files:
|
|
348
364
|
- test/data/sorghum_transcript.fa
|
349
365
|
- test/data/tiny.sam
|
350
366
|
- test/helper.rb
|
367
|
+
- test/test_bin.rb
|
351
368
|
- test/test_bowtie.rb
|
352
369
|
- test/test_cmd.rb
|
353
370
|
- test/test_comp_metrics.rb
|
@@ -367,6 +384,7 @@ post_install_message:
|
|
367
384
|
rdoc_options: []
|
368
385
|
require_paths:
|
369
386
|
- lib
|
387
|
+
- ext
|
370
388
|
required_ruby_version: !ruby/object:Gem::Requirement
|
371
389
|
requirements:
|
372
390
|
- - '>='
|
@@ -379,7 +397,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
379
397
|
version: '0'
|
380
398
|
requirements: []
|
381
399
|
rubyforge_project:
|
382
|
-
rubygems_version: 2.
|
400
|
+
rubygems_version: 2.1.4
|
383
401
|
signing_key:
|
384
402
|
specification_version: 4
|
385
403
|
summary: quality assessment of de-novo transcriptome assemblies
|