transrate 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/test/test_contig.rb CHANGED
@@ -1,12 +1,13 @@
1
1
  require 'helper'
2
2
  require 'bio'
3
+ require 'benchmark'
3
4
 
4
5
  class TestContig < Test::Unit::TestCase
5
6
 
6
7
  context "Contig" do
7
8
 
8
9
  setup do
9
- seq = Bio::Sequence.new 'ATGCGTGTATATACGCGTAG'
10
+ seq = Bio::FastaFormat.new ">test\nATGCGTGTATATACGCGTAG" # cg=3, gc=2, c*g=20
10
11
  @contig = Transrate::Contig.new seq
11
12
  end
12
13
 
@@ -23,6 +24,27 @@ class TestContig < Test::Unit::TestCase
23
24
  assert_equal 0.0, @contig.prop_n, "proportion of base n"
24
25
  end
25
26
 
27
+ should "calculate dibase composition with ambiguous bases" do
28
+ seq = Bio::FastaFormat.new ">test\nATGGGNCRYTAG"
29
+ contig = Transrate::Contig.new seq
30
+ assert_equal 1, contig.dibase_composition[:at]
31
+ assert_equal 1, contig.dibase_composition[:nn]
32
+ assert_equal 1, contig.dibase_composition[:gn]
33
+ end
34
+
35
+ should "benchmark composition" do
36
+ seq = "GCCGTGAGCTTCTTGATCGAGTTCTTCTCCCGCTTCGCGAACGCCTTGGACTCCTNGCACGGG"
37
+ seq << "GTCAGCCCCGCGATGTCGGCGGCCGCGGGCGGGGGG"
38
+ seq = seq * 100000
39
+ seq = Bio::FastaFormat.new ">test\n"+seq
40
+ contig = Transrate::Contig.new seq
41
+ ruby_time = 11 # time taken with the ruby version
42
+ c_time = Benchmark.realtime do |x|
43
+ contig.dibase_composition
44
+ end
45
+ assert c_time*100 < ruby_time, "c faster than ruby"
46
+ end
47
+
26
48
  should "know how many of each two-base pair it contains" do
27
49
  assert_equal 3, @contig.dibase_composition[:cg], "cg count"
28
50
  assert_equal 3, @contig.dibase_composition[:at], "at count"
@@ -36,19 +58,53 @@ class TestContig < Test::Unit::TestCase
36
58
  end
37
59
 
38
60
  should "know its own base-pair skew" do
39
- assert_equal 0.45, @contig.gc_skew.round(2), "gc skew"
40
- assert_equal 0.55, @contig.at_skew.round(2), "at skew"
61
+ assert_equal 0.33, @contig.gc_skew.round(2), "gc skew"
62
+ assert_equal -0.09, @contig.at_skew.round(2), "at skew"
41
63
  end
42
64
 
43
- should "know its own CpG count and density" do
44
- assert_equal 3, @contig.cpg_count, "cpg count"
45
- assert_equal 66.67, @contig.cpg_ratio.round(2), "cpg ratio"
65
+ should "know its own CpG count and ratio" do
66
+ assert_equal 5, @contig.cpg_count, "cpg count"
67
+ assert_equal 5.56, @contig.cpg_ratio.round(2), "cpg ratio"
68
+ end
69
+
70
+ should "calculate the CpG ratio" do
71
+ seq = Bio::FastaFormat.new ">test\nAAACGAAA"
72
+ contig = Transrate::Contig.new seq
73
+ assert_equal 8, contig.cpg_ratio, "cpg ratio"
46
74
  end
47
75
 
48
76
  should "know the length of its own longest orf" do
49
77
  assert_equal 6, @contig.orf_length, "orf length"
50
78
  end
51
79
 
80
+ should "not break when there is a null byte in the sequence" do
81
+ seq = Bio::FastaFormat.new ">test\nAAAAAAAAAAAA\0AAAAAAAAAAA"
82
+ contig = Transrate::Contig.new seq
83
+ assert_equal 7, contig.orf_length, "orf length"
84
+ end
85
+
86
+ should "not fail on bases that aren't ACGTN" do
87
+ seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
88
+ contig = Transrate::Contig.new seq
89
+ assert_equal 1, contig.base_composition[:n]
90
+ end
91
+
92
+ should "get kmer count with non ACGTN bases" do
93
+ seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
94
+ contig = Transrate::Contig.new seq
95
+ assert_equal 0, contig.kmer_count(6, "RRRRRRRRRRRRRRRR")
96
+ end
97
+
98
+ should "calculate linguistic complexity for a long sequence" do
99
+ alphabet = ["A", "C", "G", "T"]
100
+ seq = ""
101
+ 50000.times do
102
+ seq << alphabet.sample
103
+ end
104
+ seq = Bio::FastaFormat.new ">test\n"+seq
105
+ contig = Transrate::Contig.new seq
106
+ assert contig.linguistic_complexity(6) > 0.98, "linguistic complexity"
107
+ end
52
108
 
53
109
  should "know its own linguistic complexity" do
54
110
  assert_equal 0.0586, @contig.linguistic_complexity(4).round(4),
@@ -33,12 +33,12 @@ class TestContigMetrics < Test::Unit::TestCase
33
33
 
34
34
  should "get CpG density" do
35
35
  @contig_metrics.run
36
- assert_equal 0.52828, @contig_metrics.cpg_ratio.round(5)
36
+ assert_equal 1.51939, @contig_metrics.cpg_ratio.round(5)
37
37
  end
38
38
 
39
39
  should "get linguistic complexity" do
40
40
  @contig_metrics.run
41
- assert_equal 0.26599, @contig_metrics.linguistic_complexity.round(5)
41
+ assert_equal 0.26526, @contig_metrics.linguistic_complexity.round(5)
42
42
  end
43
43
 
44
44
  should "get the number and proportion of Ns" do
data/test/test_inline.rb CHANGED
@@ -14,7 +14,7 @@ class TestInline < Test::Unit::TestCase
14
14
 
15
15
  should 'find longest orf in file' do
16
16
  orfs = []
17
- @a.assembly.each do |contig|
17
+ @a.assembly.each_value do |contig|
18
18
  orfs << contig.orf_length
19
19
  end
20
20
  assert_equal 4, orfs.length
@@ -22,7 +22,7 @@ class TestInline < Test::Unit::TestCase
22
22
  end
23
23
 
24
24
  should 'find longest orf in sequence' do
25
- seq = Bio::Sequence.new 'ATGCCCCTAGGGTAG'
25
+ seq = Bio::FastaFormat.new ">test\nATGCCCCTAGGGTAG"
26
26
  contig = Transrate::Contig.new seq
27
27
  assert_equal 4, contig.orf_length
28
28
  end
@@ -59,7 +59,7 @@ class TestTransrater < Test::Unit::TestCase
59
59
  Dir.chdir tmpdir do
60
60
  all = @rater.all_metrics(@left, @right)
61
61
  score = @rater.assembly_score
62
- assert_equal 0.23282, score.round(5)
62
+ assert_equal 0.55815, score.round(5) # regression test
63
63
  end
64
64
  end
65
65
  end
data/transrate.gemspec CHANGED
@@ -14,20 +14,21 @@ Gem::Specification.new do |gem|
14
14
 
15
15
  gem.files = `git ls-files`.split("\n")
16
16
  gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
17
- gem.require_paths = %w( lib )
17
+ gem.require_paths = %w( lib ext )
18
+ gem.extensions = ["ext/transrate/extconf.rb"]
18
19
 
19
20
  gem.add_dependency 'yell', '~> 2.0', '>= 2.0.4'
20
21
  gem.add_dependency 'trollop', '~> 2.0'
21
- gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.7'
22
+ gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.8'
22
23
  gem.add_dependency 'which', '~> 0.0', '>= 0.0.2'
23
24
  gem.add_dependency 'bio', '~> 1.4', '>= 1.4.3'
24
25
  gem.add_dependency 'bio-samtools', '~> 2.0', '>= 2.0.5'
25
- gem.add_dependency 'RubyInline', '~> 3.12'
26
- gem.add_dependency 'crb-blast', '~> 0.2'
26
+ gem.add_dependency 'crb-blast', '~> 0.4', '>= 0.4.0'
27
27
  gem.add_dependency 'bettersam', '~> 0.0', '>= 0.0.3'
28
28
  gem.add_dependency 'ruby-prof', '~> 0.15', '>= 0.15.1'
29
29
 
30
30
  gem.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
31
+ gem.add_development_dependency 'rake-compiler', '~> 0.9', '>= 0.9.2'
31
32
  gem.add_development_dependency 'turn', '~> 0.9', '>= 0.9.7'
32
33
  gem.add_development_dependency 'minitest', '~> 4', '>= 4.7.5'
33
34
  gem.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: transrate
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Richard Smith-Unna
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2014-07-08 00:00:00.000000000 Z
12
+ date: 2014-07-25 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: yell
@@ -54,7 +54,7 @@ dependencies:
54
54
  version: '0.0'
55
55
  - - '>='
56
56
  - !ruby/object:Gem::Version
57
- version: 0.0.7
57
+ version: 0.0.8
58
58
  type: :runtime
59
59
  prerelease: false
60
60
  version_requirements: !ruby/object:Gem::Requirement
@@ -64,7 +64,7 @@ dependencies:
64
64
  version: '0.0'
65
65
  - - '>='
66
66
  - !ruby/object:Gem::Version
67
- version: 0.0.7
67
+ version: 0.0.8
68
68
  - !ruby/object:Gem::Dependency
69
69
  name: which
70
70
  requirement: !ruby/object:Gem::Requirement
@@ -125,34 +125,26 @@ dependencies:
125
125
  - - '>='
126
126
  - !ruby/object:Gem::Version
127
127
  version: 2.0.5
128
- - !ruby/object:Gem::Dependency
129
- name: RubyInline
130
- requirement: !ruby/object:Gem::Requirement
131
- requirements:
132
- - - ~>
133
- - !ruby/object:Gem::Version
134
- version: '3.12'
135
- type: :runtime
136
- prerelease: false
137
- version_requirements: !ruby/object:Gem::Requirement
138
- requirements:
139
- - - ~>
140
- - !ruby/object:Gem::Version
141
- version: '3.12'
142
128
  - !ruby/object:Gem::Dependency
143
129
  name: crb-blast
144
130
  requirement: !ruby/object:Gem::Requirement
145
131
  requirements:
146
132
  - - ~>
147
133
  - !ruby/object:Gem::Version
148
- version: '0.2'
134
+ version: '0.4'
135
+ - - '>='
136
+ - !ruby/object:Gem::Version
137
+ version: 0.4.0
149
138
  type: :runtime
150
139
  prerelease: false
151
140
  version_requirements: !ruby/object:Gem::Requirement
152
141
  requirements:
153
142
  - - ~>
154
143
  - !ruby/object:Gem::Version
155
- version: '0.2'
144
+ version: '0.4'
145
+ - - '>='
146
+ - !ruby/object:Gem::Version
147
+ version: 0.4.0
156
148
  - !ruby/object:Gem::Dependency
157
149
  name: bettersam
158
150
  requirement: !ruby/object:Gem::Requirement
@@ -213,6 +205,26 @@ dependencies:
213
205
  - - '>='
214
206
  - !ruby/object:Gem::Version
215
207
  version: 10.3.2
208
+ - !ruby/object:Gem::Dependency
209
+ name: rake-compiler
210
+ requirement: !ruby/object:Gem::Requirement
211
+ requirements:
212
+ - - ~>
213
+ - !ruby/object:Gem::Version
214
+ version: '0.9'
215
+ - - '>='
216
+ - !ruby/object:Gem::Version
217
+ version: 0.9.2
218
+ type: :development
219
+ prerelease: false
220
+ version_requirements: !ruby/object:Gem::Requirement
221
+ requirements:
222
+ - - ~>
223
+ - !ruby/object:Gem::Version
224
+ version: '0.9'
225
+ - - '>='
226
+ - !ruby/object:Gem::Version
227
+ version: 0.9.2
216
228
  - !ruby/object:Gem::Dependency
217
229
  name: turn
218
230
  requirement: !ruby/object:Gem::Requirement
@@ -312,7 +324,8 @@ description: ' a library and command-line tool for quality assessment of de-novo
312
324
  email: rds45@cam.ac.uk
313
325
  executables:
314
326
  - transrate
315
- extensions: []
327
+ extensions:
328
+ - ext/transrate/extconf.rb
316
329
  extra_rdoc_files: []
317
330
  files:
318
331
  - .gitignore
@@ -323,6 +336,9 @@ files:
323
336
  - Rakefile
324
337
  - bin/transrate
325
338
  - deps/deps.yaml
339
+ - docs/transrate_logo_full.png
340
+ - ext/transrate/extconf.rb
341
+ - ext/transrate/transrate.c
326
342
  - lib/transrate.rb
327
343
  - lib/transrate/assembly.rb
328
344
  - lib/transrate/bowtie2.rb
@@ -348,6 +364,7 @@ files:
348
364
  - test/data/sorghum_transcript.fa
349
365
  - test/data/tiny.sam
350
366
  - test/helper.rb
367
+ - test/test_bin.rb
351
368
  - test/test_bowtie.rb
352
369
  - test/test_cmd.rb
353
370
  - test/test_comp_metrics.rb
@@ -367,6 +384,7 @@ post_install_message:
367
384
  rdoc_options: []
368
385
  require_paths:
369
386
  - lib
387
+ - ext
370
388
  required_ruby_version: !ruby/object:Gem::Requirement
371
389
  requirements:
372
390
  - - '>='
@@ -379,7 +397,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
379
397
  version: '0'
380
398
  requirements: []
381
399
  rubyforge_project:
382
- rubygems_version: 2.0.6
400
+ rubygems_version: 2.1.4
383
401
  signing_key:
384
402
  specification_version: 4
385
403
  summary: quality assessment of de-novo transcriptome assemblies