transrate 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/test/test_contig.rb CHANGED
@@ -1,12 +1,13 @@
1
1
  require 'helper'
2
2
  require 'bio'
3
+ require 'benchmark'
3
4
 
4
5
  class TestContig < Test::Unit::TestCase
5
6
 
6
7
  context "Contig" do
7
8
 
8
9
  setup do
9
- seq = Bio::Sequence.new 'ATGCGTGTATATACGCGTAG'
10
+ seq = Bio::FastaFormat.new ">test\nATGCGTGTATATACGCGTAG" # cg=3, gc=2, c*g=20
10
11
  @contig = Transrate::Contig.new seq
11
12
  end
12
13
 
@@ -23,6 +24,27 @@ class TestContig < Test::Unit::TestCase
23
24
  assert_equal 0.0, @contig.prop_n, "proportion of base n"
24
25
  end
25
26
 
27
+ should "calculate dibase composition with ambiguous bases" do
28
+ seq = Bio::FastaFormat.new ">test\nATGGGNCRYTAG"
29
+ contig = Transrate::Contig.new seq
30
+ assert_equal 1, contig.dibase_composition[:at]
31
+ assert_equal 1, contig.dibase_composition[:nn]
32
+ assert_equal 1, contig.dibase_composition[:gn]
33
+ end
34
+
35
+ should "benchmark composition" do
36
+ seq = "GCCGTGAGCTTCTTGATCGAGTTCTTCTCCCGCTTCGCGAACGCCTTGGACTCCTNGCACGGG"
37
+ seq << "GTCAGCCCCGCGATGTCGGCGGCCGCGGGCGGGGGG"
38
+ seq = seq * 100000
39
+ seq = Bio::FastaFormat.new ">test\n"+seq
40
+ contig = Transrate::Contig.new seq
41
+ ruby_time = 11 # time taken with the ruby version
42
+ c_time = Benchmark.realtime do |x|
43
+ contig.dibase_composition
44
+ end
45
+ assert c_time*100 < ruby_time, "c faster than ruby"
46
+ end
47
+
26
48
  should "know how many of each two-base pair it contains" do
27
49
  assert_equal 3, @contig.dibase_composition[:cg], "cg count"
28
50
  assert_equal 3, @contig.dibase_composition[:at], "at count"
@@ -36,19 +58,53 @@ class TestContig < Test::Unit::TestCase
36
58
  end
37
59
 
38
60
  should "know its own base-pair skew" do
39
- assert_equal 0.45, @contig.gc_skew.round(2), "gc skew"
40
- assert_equal 0.55, @contig.at_skew.round(2), "at skew"
61
+ assert_equal 0.33, @contig.gc_skew.round(2), "gc skew"
62
+ assert_equal -0.09, @contig.at_skew.round(2), "at skew"
41
63
  end
42
64
 
43
- should "know its own CpG count and density" do
44
- assert_equal 3, @contig.cpg_count, "cpg count"
45
- assert_equal 66.67, @contig.cpg_ratio.round(2), "cpg ratio"
65
+ should "know its own CpG count and ratio" do
66
+ assert_equal 5, @contig.cpg_count, "cpg count"
67
+ assert_equal 5.56, @contig.cpg_ratio.round(2), "cpg ratio"
68
+ end
69
+
70
+ should "calculate the CpG ratio" do
71
+ seq = Bio::FastaFormat.new ">test\nAAACGAAA"
72
+ contig = Transrate::Contig.new seq
73
+ assert_equal 8, contig.cpg_ratio, "cpg ratio"
46
74
  end
47
75
 
48
76
  should "know the length of its own longest orf" do
49
77
  assert_equal 6, @contig.orf_length, "orf length"
50
78
  end
51
79
 
80
+ should "not break when there is a null byte in the sequence" do
81
+ seq = Bio::FastaFormat.new ">test\nAAAAAAAAAAAA\0AAAAAAAAAAA"
82
+ contig = Transrate::Contig.new seq
83
+ assert_equal 7, contig.orf_length, "orf length"
84
+ end
85
+
86
+ should "not fail on bases that aren't ACGTN" do
87
+ seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
88
+ contig = Transrate::Contig.new seq
89
+ assert_equal 1, contig.base_composition[:n]
90
+ end
91
+
92
+ should "get kmer count with non ACGTN bases" do
93
+ seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
94
+ contig = Transrate::Contig.new seq
95
+ assert_equal 0, contig.kmer_count(6, "RRRRRRRRRRRRRRRR")
96
+ end
97
+
98
+ should "calculate linguistic complexity for a long sequence" do
99
+ alphabet = ["A", "C", "G", "T"]
100
+ seq = ""
101
+ 50000.times do
102
+ seq << alphabet.sample
103
+ end
104
+ seq = Bio::FastaFormat.new ">test\n"+seq
105
+ contig = Transrate::Contig.new seq
106
+ assert contig.linguistic_complexity(6) > 0.98, "linguistic complexity"
107
+ end
52
108
 
53
109
  should "know its own linguistic complexity" do
54
110
  assert_equal 0.0586, @contig.linguistic_complexity(4).round(4),
@@ -33,12 +33,12 @@ class TestContigMetrics < Test::Unit::TestCase
33
33
 
34
34
  should "get CpG density" do
35
35
  @contig_metrics.run
36
- assert_equal 0.52828, @contig_metrics.cpg_ratio.round(5)
36
+ assert_equal 1.51939, @contig_metrics.cpg_ratio.round(5)
37
37
  end
38
38
 
39
39
  should "get linguistic complexity" do
40
40
  @contig_metrics.run
41
- assert_equal 0.26599, @contig_metrics.linguistic_complexity.round(5)
41
+ assert_equal 0.26526, @contig_metrics.linguistic_complexity.round(5)
42
42
  end
43
43
 
44
44
  should "get the number and proportion of Ns" do
data/test/test_inline.rb CHANGED
@@ -14,7 +14,7 @@ class TestInline < Test::Unit::TestCase
14
14
 
15
15
  should 'find longest orf in file' do
16
16
  orfs = []
17
- @a.assembly.each do |contig|
17
+ @a.assembly.each_value do |contig|
18
18
  orfs << contig.orf_length
19
19
  end
20
20
  assert_equal 4, orfs.length
@@ -22,7 +22,7 @@ class TestInline < Test::Unit::TestCase
22
22
  end
23
23
 
24
24
  should 'find longest orf in sequence' do
25
- seq = Bio::Sequence.new 'ATGCCCCTAGGGTAG'
25
+ seq = Bio::FastaFormat.new ">test\nATGCCCCTAGGGTAG"
26
26
  contig = Transrate::Contig.new seq
27
27
  assert_equal 4, contig.orf_length
28
28
  end
@@ -59,7 +59,7 @@ class TestTransrater < Test::Unit::TestCase
59
59
  Dir.chdir tmpdir do
60
60
  all = @rater.all_metrics(@left, @right)
61
61
  score = @rater.assembly_score
62
- assert_equal 0.23282, score.round(5)
62
+ assert_equal 0.55815, score.round(5) # regression test
63
63
  end
64
64
  end
65
65
  end
data/transrate.gemspec CHANGED
@@ -14,20 +14,21 @@ Gem::Specification.new do |gem|
14
14
 
15
15
  gem.files = `git ls-files`.split("\n")
16
16
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
- gem.require_paths = %w( lib )
17
+ gem.require_paths = %w( lib ext )
18
+ gem.extensions = ["ext/transrate/extconf.rb"]
18
19
 
19
20
  gem.add_dependency 'yell', '~> 2.0', '>= 2.0.4'
20
21
  gem.add_dependency 'trollop', '~> 2.0'
21
- gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.7'
22
+ gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.8'
22
23
  gem.add_dependency 'which', '~> 0.0', '>= 0.0.2'
23
24
  gem.add_dependency 'bio', '~> 1.4', '>= 1.4.3'
24
25
  gem.add_dependency 'bio-samtools', '~> 2.0', '>= 2.0.5'
25
- gem.add_dependency 'RubyInline', '~> 3.12'
26
- gem.add_dependency 'crb-blast', '~> 0.2'
26
+ gem.add_dependency 'crb-blast', '~> 0.4', '>= 0.4.0'
27
27
  gem.add_dependency 'bettersam', '~> 0.0', '>= 0.0.3'
28
28
  gem.add_dependency 'ruby-prof', '~> 0.15', '>= 0.15.1'
29
29
 
30
30
  gem.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
31
+ gem.add_development_dependency 'rake-compiler', '~> 0.9', '>= 0.9.2'
31
32
  gem.add_development_dependency 'turn', '~> 0.9', '>= 0.9.7'
32
33
  gem.add_development_dependency 'minitest', '~> 4', '>= 4.7.5'
33
34
  gem.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: transrate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Smith-Unna
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-08 00:00:00.000000000 Z
12
+ date: 2014-07-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yell
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0.0'
55
55
  - - '>='
56
56
  - !ruby/object:Gem::Version
57
- version: 0.0.7
57
+ version: 0.0.8
58
58
  type: :runtime
59
59
  prerelease: false
60
60
  version_requirements: !ruby/object:Gem::Requirement
@@ -64,7 +64,7 @@ dependencies:
64
64
  version: '0.0'
65
65
  - - '>='
66
66
  - !ruby/object:Gem::Version
67
- version: 0.0.7
67
+ version: 0.0.8
68
68
  - !ruby/object:Gem::Dependency
69
69
  name: which
70
70
  requirement: !ruby/object:Gem::Requirement
@@ -125,34 +125,26 @@ dependencies:
125
125
  - - '>='
126
126
  - !ruby/object:Gem::Version
127
127
  version: 2.0.5
128
- - !ruby/object:Gem::Dependency
129
- name: RubyInline
130
- requirement: !ruby/object:Gem::Requirement
131
- requirements:
132
- - - ~>
133
- - !ruby/object:Gem::Version
134
- version: '3.12'
135
- type: :runtime
136
- prerelease: false
137
- version_requirements: !ruby/object:Gem::Requirement
138
- requirements:
139
- - - ~>
140
- - !ruby/object:Gem::Version
141
- version: '3.12'
142
128
  - !ruby/object:Gem::Dependency
143
129
  name: crb-blast
144
130
  requirement: !ruby/object:Gem::Requirement
145
131
  requirements:
146
132
  - - ~>
147
133
  - !ruby/object:Gem::Version
148
- version: '0.2'
134
+ version: '0.4'
135
+ - - '>='
136
+ - !ruby/object:Gem::Version
137
+ version: 0.4.0
149
138
  type: :runtime
150
139
  prerelease: false
151
140
  version_requirements: !ruby/object:Gem::Requirement
152
141
  requirements:
153
142
  - - ~>
154
143
  - !ruby/object:Gem::Version
155
- version: '0.2'
144
+ version: '0.4'
145
+ - - '>='
146
+ - !ruby/object:Gem::Version
147
+ version: 0.4.0
156
148
  - !ruby/object:Gem::Dependency
157
149
  name: bettersam
158
150
  requirement: !ruby/object:Gem::Requirement
@@ -213,6 +205,26 @@ dependencies:
213
205
  - - '>='
214
206
  - !ruby/object:Gem::Version
215
207
  version: 10.3.2
208
+ - !ruby/object:Gem::Dependency
209
+ name: rake-compiler
210
+ requirement: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - ~>
213
+ - !ruby/object:Gem::Version
214
+ version: '0.9'
215
+ - - '>='
216
+ - !ruby/object:Gem::Version
217
+ version: 0.9.2
218
+ type: :development
219
+ prerelease: false
220
+ version_requirements: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ~>
223
+ - !ruby/object:Gem::Version
224
+ version: '0.9'
225
+ - - '>='
226
+ - !ruby/object:Gem::Version
227
+ version: 0.9.2
216
228
  - !ruby/object:Gem::Dependency
217
229
  name: turn
218
230
  requirement: !ruby/object:Gem::Requirement
@@ -312,7 +324,8 @@ description: ' a library and command-line tool for quality assessment of de-novo
312
324
  email: rds45@cam.ac.uk
313
325
  executables:
314
326
  - transrate
315
- extensions: []
327
+ extensions:
328
+ - ext/transrate/extconf.rb
316
329
  extra_rdoc_files: []
317
330
  files:
318
331
  - .gitignore
@@ -323,6 +336,9 @@ files:
323
336
  - Rakefile
324
337
  - bin/transrate
325
338
  - deps/deps.yaml
339
+ - docs/transrate_logo_full.png
340
+ - ext/transrate/extconf.rb
341
+ - ext/transrate/transrate.c
326
342
  - lib/transrate.rb
327
343
  - lib/transrate/assembly.rb
328
344
  - lib/transrate/bowtie2.rb
@@ -348,6 +364,7 @@ files:
348
364
  - test/data/sorghum_transcript.fa
349
365
  - test/data/tiny.sam
350
366
  - test/helper.rb
367
+ - test/test_bin.rb
351
368
  - test/test_bowtie.rb
352
369
  - test/test_cmd.rb
353
370
  - test/test_comp_metrics.rb
@@ -367,6 +384,7 @@ post_install_message:
367
384
  rdoc_options: []
368
385
  require_paths:
369
386
  - lib
387
+ - ext
370
388
  required_ruby_version: !ruby/object:Gem::Requirement
371
389
  requirements:
372
390
  - - '>='
@@ -379,7 +397,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
379
397
  version: '0'
380
398
  requirements: []
381
399
  rubyforge_project:
382
- rubygems_version: 2.0.6
400
+ rubygems_version: 2.1.4
383
401
  signing_key:
384
402
  specification_version: 4
385
403
  summary: quality assessment of de-novo transcriptome assemblies