transrate 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +2 -0
- data/.travis.yml +1 -0
- data/LICENSE +2 -15
- data/README.md +14 -132
- data/Rakefile +19 -2
- data/bin/transrate +49 -10
- data/deps/deps.yaml +0 -10
- data/docs/transrate_logo_full.png +0 -0
- data/ext/transrate/extconf.rb +13 -0
- data/ext/transrate/transrate.c +223 -0
- data/lib/transrate.rb +1 -0
- data/lib/transrate/assembly.rb +12 -10
- data/lib/transrate/bowtie2.rb +7 -0
- data/lib/transrate/comparative_metrics.rb +103 -73
- data/lib/transrate/contig.rb +94 -93
- data/lib/transrate/contig_metrics.rb +1 -2
- data/lib/transrate/read_metrics.rb +13 -7
- data/lib/transrate/version.rb +1 -1
- data/test/helper.rb +1 -31
- data/test/test_bin.rb +99 -0
- data/test/test_bowtie.rb +12 -0
- data/test/test_comp_metrics.rb +161 -104
- data/test/test_contig.rb +62 -6
- data/test/test_contig_metrics.rb +2 -2
- data/test/test_inline.rb +2 -2
- data/test/test_transrater.rb +1 -1
- data/transrate.gemspec +5 -4
- metadata +40 -22
    
        data/test/test_contig.rb
    CHANGED
    
    | @@ -1,12 +1,13 @@ | |
| 1 1 | 
             
            require 'helper'
         | 
| 2 2 | 
             
            require 'bio'
         | 
| 3 | 
            +
            require 'benchmark'
         | 
| 3 4 |  | 
| 4 5 | 
             
            class TestContig < Test::Unit::TestCase
         | 
| 5 6 |  | 
| 6 7 | 
             
              context "Contig" do
         | 
| 7 8 |  | 
| 8 9 | 
             
                setup do
         | 
| 9 | 
            -
                  seq = Bio:: | 
| 10 | 
            +
                  seq = Bio::FastaFormat.new ">test\nATGCGTGTATATACGCGTAG" # cg=3, gc=2, c*g=20
         | 
| 10 11 | 
             
                  @contig = Transrate::Contig.new seq
         | 
| 11 12 | 
             
                end
         | 
| 12 13 |  | 
| @@ -23,6 +24,27 @@ class TestContig < Test::Unit::TestCase | |
| 23 24 | 
             
                  assert_equal 0.0, @contig.prop_n, "proportion of base n"
         | 
| 24 25 | 
             
                end
         | 
| 25 26 |  | 
| 27 | 
            +
                should "calculate dibase composition with ambiguous bases" do
         | 
| 28 | 
            +
                  seq = Bio::FastaFormat.new ">test\nATGGGNCRYTAG"
         | 
| 29 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 30 | 
            +
                  assert_equal 1, contig.dibase_composition[:at]
         | 
| 31 | 
            +
                  assert_equal 1, contig.dibase_composition[:nn]
         | 
| 32 | 
            +
                  assert_equal 1, contig.dibase_composition[:gn]
         | 
| 33 | 
            +
                end
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                should "benchmark composition" do
         | 
| 36 | 
            +
                  seq = "GCCGTGAGCTTCTTGATCGAGTTCTTCTCCCGCTTCGCGAACGCCTTGGACTCCTNGCACGGG"
         | 
| 37 | 
            +
                  seq << "GTCAGCCCCGCGATGTCGGCGGCCGCGGGCGGGGGG"
         | 
| 38 | 
            +
                  seq = seq * 100000
         | 
| 39 | 
            +
                  seq = Bio::FastaFormat.new ">test\n"+seq
         | 
| 40 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 41 | 
            +
                  ruby_time = 11 # time taken with the ruby version
         | 
| 42 | 
            +
                  c_time = Benchmark.realtime do |x|
         | 
| 43 | 
            +
                    contig.dibase_composition
         | 
| 44 | 
            +
                  end
         | 
| 45 | 
            +
                  assert c_time*100 < ruby_time, "c faster than ruby"
         | 
| 46 | 
            +
                end
         | 
| 47 | 
            +
             | 
| 26 48 | 
             
                should "know how many of each two-base pair it contains" do
         | 
| 27 49 | 
             
                  assert_equal 3, @contig.dibase_composition[:cg], "cg count"
         | 
| 28 50 | 
             
                  assert_equal 3, @contig.dibase_composition[:at], "at count"
         | 
| @@ -36,19 +58,53 @@ class TestContig < Test::Unit::TestCase | |
| 36 58 | 
             
                end
         | 
| 37 59 |  | 
| 38 60 | 
             
                should "know its own base-pair skew" do
         | 
| 39 | 
            -
                  assert_equal 0. | 
| 40 | 
            -
                  assert_equal 0. | 
| 61 | 
            +
                  assert_equal 0.33, @contig.gc_skew.round(2), "gc skew"
         | 
| 62 | 
            +
                  assert_equal -0.09, @contig.at_skew.round(2), "at skew"
         | 
| 41 63 | 
             
                end
         | 
| 42 64 |  | 
| 43 | 
            -
                should "know its own CpG count and  | 
| 44 | 
            -
                  assert_equal  | 
| 45 | 
            -
                  assert_equal  | 
| 65 | 
            +
                should "know its own CpG count and ratio" do
         | 
| 66 | 
            +
                  assert_equal 5, @contig.cpg_count, "cpg count"
         | 
| 67 | 
            +
                  assert_equal 5.56, @contig.cpg_ratio.round(2), "cpg ratio"
         | 
| 68 | 
            +
                end
         | 
| 69 | 
            +
             | 
| 70 | 
            +
                should "calculate the CpG ratio" do
         | 
| 71 | 
            +
                  seq = Bio::FastaFormat.new ">test\nAAACGAAA"
         | 
| 72 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 73 | 
            +
                  assert_equal 8, contig.cpg_ratio, "cpg ratio"
         | 
| 46 74 | 
             
                end
         | 
| 47 75 |  | 
| 48 76 | 
             
                should "know the length of its own longest orf" do
         | 
| 49 77 | 
             
                  assert_equal 6, @contig.orf_length, "orf length"
         | 
| 50 78 | 
             
                end
         | 
| 51 79 |  | 
| 80 | 
            +
                should "not break when there is a null byte in the sequence" do
         | 
| 81 | 
            +
                  seq = Bio::FastaFormat.new ">test\nAAAAAAAAAAAA\0AAAAAAAAAAA"
         | 
| 82 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 83 | 
            +
                  assert_equal 7, contig.orf_length, "orf length"
         | 
| 84 | 
            +
                end
         | 
| 85 | 
            +
             | 
| 86 | 
            +
                should "not fail on bases that aren't ACGTN" do
         | 
| 87 | 
            +
                  seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
         | 
| 88 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 89 | 
            +
                  assert_equal 1, contig.base_composition[:n]
         | 
| 90 | 
            +
                end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                should "get kmer count with non ACGTN bases" do
         | 
| 93 | 
            +
                  seq = Bio::FastaFormat.new ">test\nATGCGTGTARATACGCGTAG"
         | 
| 94 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 95 | 
            +
                  assert_equal 0, contig.kmer_count(6, "RRRRRRRRRRRRRRRR")
         | 
| 96 | 
            +
                end
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                should "calculate linguistic complexity for a long sequence" do
         | 
| 99 | 
            +
                  alphabet = ["A", "C", "G", "T"]
         | 
| 100 | 
            +
                  seq = ""
         | 
| 101 | 
            +
                  50000.times do
         | 
| 102 | 
            +
                    seq << alphabet.sample
         | 
| 103 | 
            +
                  end
         | 
| 104 | 
            +
                  seq = Bio::FastaFormat.new ">test\n"+seq
         | 
| 105 | 
            +
                  contig = Transrate::Contig.new seq
         | 
| 106 | 
            +
                  assert contig.linguistic_complexity(6) > 0.98, "linguistic complexity"
         | 
| 107 | 
            +
                end
         | 
| 52 108 |  | 
| 53 109 | 
             
                should "know its own linguistic complexity" do
         | 
| 54 110 | 
             
                  assert_equal 0.0586, @contig.linguistic_complexity(4).round(4),
         | 
    
        data/test/test_contig_metrics.rb
    CHANGED
    
    | @@ -33,12 +33,12 @@ class TestContigMetrics < Test::Unit::TestCase | |
| 33 33 |  | 
| 34 34 | 
             
                should "get CpG density" do
         | 
| 35 35 | 
             
                  @contig_metrics.run
         | 
| 36 | 
            -
                  assert_equal  | 
| 36 | 
            +
                  assert_equal 1.51939, @contig_metrics.cpg_ratio.round(5)
         | 
| 37 37 | 
             
                end
         | 
| 38 38 |  | 
| 39 39 | 
             
                should "get linguistic complexity" do
         | 
| 40 40 | 
             
                  @contig_metrics.run
         | 
| 41 | 
            -
                  assert_equal 0. | 
| 41 | 
            +
                  assert_equal 0.26526, @contig_metrics.linguistic_complexity.round(5)
         | 
| 42 42 | 
             
                end
         | 
| 43 43 |  | 
| 44 44 | 
             
                should "get the number and proportion of Ns" do
         | 
    
        data/test/test_inline.rb
    CHANGED
    
    | @@ -14,7 +14,7 @@ class TestInline < Test::Unit::TestCase | |
| 14 14 |  | 
| 15 15 | 
             
                should 'find longest orf in file' do
         | 
| 16 16 | 
             
                  orfs = []
         | 
| 17 | 
            -
                  @a.assembly. | 
| 17 | 
            +
                  @a.assembly.each_value do |contig|
         | 
| 18 18 | 
             
                    orfs << contig.orf_length
         | 
| 19 19 | 
             
                  end
         | 
| 20 20 | 
             
                  assert_equal 4, orfs.length
         | 
| @@ -22,7 +22,7 @@ class TestInline < Test::Unit::TestCase | |
| 22 22 | 
             
                end
         | 
| 23 23 |  | 
| 24 24 | 
             
                should 'find longest orf in sequence' do
         | 
| 25 | 
            -
                  seq = Bio:: | 
| 25 | 
            +
                  seq = Bio::FastaFormat.new ">test\nATGCCCCTAGGGTAG"
         | 
| 26 26 | 
             
                  contig = Transrate::Contig.new seq
         | 
| 27 27 | 
             
                  assert_equal 4, contig.orf_length
         | 
| 28 28 | 
             
                end
         | 
    
        data/test/test_transrater.rb
    CHANGED
    
    | @@ -59,7 +59,7 @@ class TestTransrater < Test::Unit::TestCase | |
| 59 59 | 
             
                    Dir.chdir tmpdir do
         | 
| 60 60 | 
             
                      all = @rater.all_metrics(@left, @right)
         | 
| 61 61 | 
             
                      score = @rater.assembly_score
         | 
| 62 | 
            -
                      assert_equal 0. | 
| 62 | 
            +
                      assert_equal 0.55815, score.round(5) # regression test
         | 
| 63 63 | 
             
                    end
         | 
| 64 64 | 
             
                  end
         | 
| 65 65 | 
             
                end
         | 
    
        data/transrate.gemspec
    CHANGED
    
    | @@ -14,20 +14,21 @@ Gem::Specification.new do |gem| | |
| 14 14 |  | 
| 15 15 | 
             
              gem.files = `git ls-files`.split("\n")
         | 
| 16 16 | 
             
              gem.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
         | 
| 17 | 
            -
              gem.require_paths = %w( lib )
         | 
| 17 | 
            +
              gem.require_paths = %w( lib ext )
         | 
| 18 | 
            +
              gem.extensions  = ["ext/transrate/extconf.rb"]
         | 
| 18 19 |  | 
| 19 20 | 
             
              gem.add_dependency 'yell', '~> 2.0', '>= 2.0.4'
         | 
| 20 21 | 
             
              gem.add_dependency 'trollop', '~> 2.0'
         | 
| 21 | 
            -
              gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0. | 
| 22 | 
            +
              gem.add_dependency 'bindeps', '~> 0.0', '>= 0.0.8'
         | 
| 22 23 | 
             
              gem.add_dependency 'which', '~> 0.0', '>= 0.0.2'
         | 
| 23 24 | 
             
              gem.add_dependency 'bio', '~> 1.4', '>= 1.4.3'
         | 
| 24 25 | 
             
              gem.add_dependency 'bio-samtools', '~> 2.0', '>= 2.0.5'
         | 
| 25 | 
            -
              gem.add_dependency ' | 
| 26 | 
            -
              gem.add_dependency 'crb-blast', '~> 0.2'
         | 
| 26 | 
            +
              gem.add_dependency 'crb-blast', '~> 0.4', '>= 0.4.0'
         | 
| 27 27 | 
             
              gem.add_dependency 'bettersam', '~> 0.0', '>= 0.0.3'
         | 
| 28 28 | 
             
              gem.add_dependency 'ruby-prof', '~> 0.15', '>= 0.15.1'
         | 
| 29 29 |  | 
| 30 30 | 
             
              gem.add_development_dependency 'rake', '~> 10.3', '>= 10.3.2'
         | 
| 31 | 
            +
              gem.add_development_dependency 'rake-compiler', '~> 0.9', '>= 0.9.2'
         | 
| 31 32 | 
             
              gem.add_development_dependency 'turn', '~> 0.9', '>= 0.9.7'
         | 
| 32 33 | 
             
              gem.add_development_dependency 'minitest', '~> 4', '>= 4.7.5'
         | 
| 33 34 | 
             
              gem.add_development_dependency 'simplecov', '~> 0.8', '>= 0.8.2'
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: transrate
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.3.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Richard Smith-Unna
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2014-07- | 
| 12 | 
            +
            date: 2014-07-25 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies:
         | 
| 14 14 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 15 15 | 
             
              name: yell
         | 
| @@ -54,7 +54,7 @@ dependencies: | |
| 54 54 | 
             
                    version: '0.0'
         | 
| 55 55 | 
             
                - - '>='
         | 
| 56 56 | 
             
                  - !ruby/object:Gem::Version
         | 
| 57 | 
            -
                    version: 0.0. | 
| 57 | 
            +
                    version: 0.0.8
         | 
| 58 58 | 
             
              type: :runtime
         | 
| 59 59 | 
             
              prerelease: false
         | 
| 60 60 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| @@ -64,7 +64,7 @@ dependencies: | |
| 64 64 | 
             
                    version: '0.0'
         | 
| 65 65 | 
             
                - - '>='
         | 
| 66 66 | 
             
                  - !ruby/object:Gem::Version
         | 
| 67 | 
            -
                    version: 0.0. | 
| 67 | 
            +
                    version: 0.0.8
         | 
| 68 68 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 69 69 | 
             
              name: which
         | 
| 70 70 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -125,34 +125,26 @@ dependencies: | |
| 125 125 | 
             
                - - '>='
         | 
| 126 126 | 
             
                  - !ruby/object:Gem::Version
         | 
| 127 127 | 
             
                    version: 2.0.5
         | 
| 128 | 
            -
            - !ruby/object:Gem::Dependency
         | 
| 129 | 
            -
              name: RubyInline
         | 
| 130 | 
            -
              requirement: !ruby/object:Gem::Requirement
         | 
| 131 | 
            -
                requirements:
         | 
| 132 | 
            -
                - - ~>
         | 
| 133 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 134 | 
            -
                    version: '3.12'
         | 
| 135 | 
            -
              type: :runtime
         | 
| 136 | 
            -
              prerelease: false
         | 
| 137 | 
            -
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 138 | 
            -
                requirements:
         | 
| 139 | 
            -
                - - ~>
         | 
| 140 | 
            -
                  - !ruby/object:Gem::Version
         | 
| 141 | 
            -
                    version: '3.12'
         | 
| 142 128 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 143 129 | 
             
              name: crb-blast
         | 
| 144 130 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 145 131 | 
             
                requirements:
         | 
| 146 132 | 
             
                - - ~>
         | 
| 147 133 | 
             
                  - !ruby/object:Gem::Version
         | 
| 148 | 
            -
                    version: '0. | 
| 134 | 
            +
                    version: '0.4'
         | 
| 135 | 
            +
                - - '>='
         | 
| 136 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 137 | 
            +
                    version: 0.4.0
         | 
| 149 138 | 
             
              type: :runtime
         | 
| 150 139 | 
             
              prerelease: false
         | 
| 151 140 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 152 141 | 
             
                requirements:
         | 
| 153 142 | 
             
                - - ~>
         | 
| 154 143 | 
             
                  - !ruby/object:Gem::Version
         | 
| 155 | 
            -
                    version: '0. | 
| 144 | 
            +
                    version: '0.4'
         | 
| 145 | 
            +
                - - '>='
         | 
| 146 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 147 | 
            +
                    version: 0.4.0
         | 
| 156 148 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 157 149 | 
             
              name: bettersam
         | 
| 158 150 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -213,6 +205,26 @@ dependencies: | |
| 213 205 | 
             
                - - '>='
         | 
| 214 206 | 
             
                  - !ruby/object:Gem::Version
         | 
| 215 207 | 
             
                    version: 10.3.2
         | 
| 208 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 209 | 
            +
              name: rake-compiler
         | 
| 210 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 211 | 
            +
                requirements:
         | 
| 212 | 
            +
                - - ~>
         | 
| 213 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 214 | 
            +
                    version: '0.9'
         | 
| 215 | 
            +
                - - '>='
         | 
| 216 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 217 | 
            +
                    version: 0.9.2
         | 
| 218 | 
            +
              type: :development
         | 
| 219 | 
            +
              prerelease: false
         | 
| 220 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 221 | 
            +
                requirements:
         | 
| 222 | 
            +
                - - ~>
         | 
| 223 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 224 | 
            +
                    version: '0.9'
         | 
| 225 | 
            +
                - - '>='
         | 
| 226 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 227 | 
            +
                    version: 0.9.2
         | 
| 216 228 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 217 229 | 
             
              name: turn
         | 
| 218 230 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -312,7 +324,8 @@ description: ' a library and command-line tool for quality assessment of de-novo | |
| 312 324 | 
             
            email: rds45@cam.ac.uk
         | 
| 313 325 | 
             
            executables:
         | 
| 314 326 | 
             
            - transrate
         | 
| 315 | 
            -
            extensions: | 
| 327 | 
            +
            extensions:
         | 
| 328 | 
            +
            - ext/transrate/extconf.rb
         | 
| 316 329 | 
             
            extra_rdoc_files: []
         | 
| 317 330 | 
             
            files:
         | 
| 318 331 | 
             
            - .gitignore
         | 
| @@ -323,6 +336,9 @@ files: | |
| 323 336 | 
             
            - Rakefile
         | 
| 324 337 | 
             
            - bin/transrate
         | 
| 325 338 | 
             
            - deps/deps.yaml
         | 
| 339 | 
            +
            - docs/transrate_logo_full.png
         | 
| 340 | 
            +
            - ext/transrate/extconf.rb
         | 
| 341 | 
            +
            - ext/transrate/transrate.c
         | 
| 326 342 | 
             
            - lib/transrate.rb
         | 
| 327 343 | 
             
            - lib/transrate/assembly.rb
         | 
| 328 344 | 
             
            - lib/transrate/bowtie2.rb
         | 
| @@ -348,6 +364,7 @@ files: | |
| 348 364 | 
             
            - test/data/sorghum_transcript.fa
         | 
| 349 365 | 
             
            - test/data/tiny.sam
         | 
| 350 366 | 
             
            - test/helper.rb
         | 
| 367 | 
            +
            - test/test_bin.rb
         | 
| 351 368 | 
             
            - test/test_bowtie.rb
         | 
| 352 369 | 
             
            - test/test_cmd.rb
         | 
| 353 370 | 
             
            - test/test_comp_metrics.rb
         | 
| @@ -367,6 +384,7 @@ post_install_message: | |
| 367 384 | 
             
            rdoc_options: []
         | 
| 368 385 | 
             
            require_paths:
         | 
| 369 386 | 
             
            - lib
         | 
| 387 | 
            +
            - ext
         | 
| 370 388 | 
             
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 371 389 | 
             
              requirements:
         | 
| 372 390 | 
             
              - - '>='
         | 
| @@ -379,7 +397,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 379 397 | 
             
                  version: '0'
         | 
| 380 398 | 
             
            requirements: []
         | 
| 381 399 | 
             
            rubyforge_project: 
         | 
| 382 | 
            -
            rubygems_version: 2. | 
| 400 | 
            +
            rubygems_version: 2.1.4
         | 
| 383 401 | 
             
            signing_key: 
         | 
| 384 402 | 
             
            specification_version: 4
         | 
| 385 403 | 
             
            summary: quality assessment of de-novo transcriptome assemblies
         |