bio-maf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
 - data/.simplecov +1 -0
 - data/.travis.yml +16 -0
 - data/.yardopts +3 -0
 - data/DEVELOPMENT.md +40 -0
 - data/Gemfile +23 -0
 - data/LICENSE.txt +20 -0
 - data/README.md +209 -0
 - data/Rakefile +76 -0
 - data/VERSION +1 -0
 - data/benchmarks/dispatch_bench +53 -0
 - data/benchmarks/iter_bench +44 -0
 - data/benchmarks/read_bench +40 -0
 - data/benchmarks/sort_bench +33 -0
 - data/benchmarks/split_bench +33 -0
 - data/bin/maf_count +82 -0
 - data/bin/maf_dump_blocks +27 -0
 - data/bin/maf_extract_ranges_count +44 -0
 - data/bin/maf_index +88 -0
 - data/bin/maf_parse_bench +94 -0
 - data/bin/maf_to_fasta +68 -0
 - data/bin/maf_write +84 -0
 - data/bin/random_ranges +35 -0
 - data/features/maf-indexing.feature +31 -0
 - data/features/maf-output.feature +29 -0
 - data/features/maf-parsing.feature +44 -0
 - data/features/maf-querying.feature +75 -0
 - data/features/maf-to-fasta.feature +50 -0
 - data/features/step_definitions/convert_steps.rb +45 -0
 - data/features/step_definitions/index_steps.rb +20 -0
 - data/features/step_definitions/output_steps.rb +27 -0
 - data/features/step_definitions/parse_steps.rb +63 -0
 - data/features/step_definitions/query_steps.rb +31 -0
 - data/features/step_definitions/ucsc_bin_steps.rb +14 -0
 - data/features/support/env.rb +16 -0
 - data/features/ucsc-bins.feature +24 -0
 - data/lib/bio/maf/index.rb +620 -0
 - data/lib/bio/maf/parser.rb +888 -0
 - data/lib/bio/maf/struct.rb +63 -0
 - data/lib/bio/maf/writer.rb +63 -0
 - data/lib/bio/maf.rb +4 -0
 - data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
 - data/lib/bio/ucsc/ucsc_bin.rb +117 -0
 - data/lib/bio/ucsc.rb +2 -0
 - data/lib/bio-maf/maf.rb +3 -0
 - data/lib/bio-maf.rb +12 -0
 - data/man/.gitignore +1 -0
 - data/man/maf_index.1 +105 -0
 - data/man/maf_index.1.markdown +97 -0
 - data/man/maf_index.1.ronn +83 -0
 - data/man/maf_to_fasta.1 +53 -0
 - data/man/maf_to_fasta.1.ronn +51 -0
 - data/spec/bio/maf/index_spec.rb +363 -0
 - data/spec/bio/maf/parser_spec.rb +354 -0
 - data/spec/bio/maf/struct_spec.rb +75 -0
 - data/spec/spec_helper.rb +14 -0
 - data/test/data/big-block.maf +15999 -0
 - data/test/data/chr22_ieq.maf +11 -0
 - data/test/data/chrY-1block.maf +6 -0
 - data/test/data/empty +0 -0
 - data/test/data/empty.db +0 -0
 - data/test/data/mm8_chr7_tiny.kct +0 -0
 - data/test/data/mm8_chr7_tiny.maf +76 -0
 - data/test/data/mm8_mod_a.maf +7 -0
 - data/test/data/mm8_single.maf +13 -0
 - data/test/data/mm8_subset_a.maf +23 -0
 - data/test/data/t1-bad1.maf +15 -0
 - data/test/data/t1.fasta +12 -0
 - data/test/data/t1.maf +15 -0
 - data/test/data/t1a.maf +17 -0
 - data/test/helper.rb +18 -0
 - data/test/test_bio-maf.rb +7 -0
 - data/travis-ci/install_kc +13 -0
 - data/travis-ci/install_kc_java +13 -0
 - data/travis-ci/report_errors +4 -0
 - metadata +181 -0
 
| 
         @@ -0,0 +1,363 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'spec_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            module Bio
         
     | 
| 
      
 4 
     | 
    
         
            +
              module MAF
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
                describe KyotoIndex do
         
     | 
| 
      
 7 
     | 
    
         
            +
                  def has_at_least_n_with_prefix(n, start)
         
     | 
| 
      
 8 
     | 
    
         
            +
                    @idx.db.cursor_process do |cur|
         
     | 
| 
      
 9 
     | 
    
         
            +
                      i = 0
         
     | 
| 
      
 10 
     | 
    
         
            +
                      cur.jump(start)
         
     | 
| 
      
 11 
     | 
    
         
            +
                      k = cur.get_key(true)
         
     | 
| 
      
 12 
     | 
    
         
            +
                      while k && k.start_with?(start) && i < n
         
     | 
| 
      
 13 
     | 
    
         
            +
                        i += 1
         
     | 
| 
      
 14 
     | 
    
         
            +
                      end
         
     | 
| 
      
 15 
     | 
    
         
            +
                      return i == n
         
     | 
| 
      
 16 
     | 
    
         
            +
                    end
         
     | 
| 
      
 17 
     | 
    
         
            +
                  end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
                  describe ".build" do
         
     | 
| 
      
 20 
     | 
    
         
            +
                    it "accepts '%' as a path for an in-memory DB" do
         
     | 
| 
      
 21 
     | 
    
         
            +
                      expect {
         
     | 
| 
      
 22 
     | 
    
         
            +
                        @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 23 
     | 
    
         
            +
                        @idx = KyotoIndex.build(@p, '%')
         
     | 
| 
      
 24 
     | 
    
         
            +
                        @p.f.close
         
     | 
| 
      
 25 
     | 
    
         
            +
                        @idx.close
         
     | 
| 
      
 26 
     | 
    
         
            +
                      }.not_to raise_error
         
     | 
| 
      
 27 
     | 
    
         
            +
                    end
         
     | 
| 
      
 28 
     | 
    
         
            +
                    it "accepts .kct paths"
         
     | 
| 
      
 29 
     | 
    
         
            +
                    it "rejects other paths"
         
     | 
| 
      
 30 
     | 
    
         
            +
                    context "mm8_chr7" do
         
     | 
| 
      
 31 
     | 
    
         
            +
                      before(:each) do 
         
     | 
| 
      
 32 
     | 
    
         
            +
                        @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 33 
     | 
    
         
            +
                        @idx = KyotoIndex.build(@p, '%')
         
     | 
| 
      
 34 
     | 
    
         
            +
                      end
         
     | 
| 
      
 35 
     | 
    
         
            +
                      it "uses the first sequence appearing as the reference sequence" do
         
     | 
| 
      
 36 
     | 
    
         
            +
                        @idx.index_sequences.to_a.should == [["mm8.chr7", 0]]
         
     | 
| 
      
 37 
     | 
    
         
            +
                      end
         
     | 
| 
      
 38 
     | 
    
         
            +
                      it "creates 8 index entries" do
         
     | 
| 
      
 39 
     | 
    
         
            +
                        has_at_least_n_with_prefix(8, "\xFF\x00").should be_true
         
     | 
| 
      
 40 
     | 
    
         
            +
                      end
         
     | 
| 
      
 41 
     | 
    
         
            +
                      it "stores the sequence IDs" do
         
     | 
| 
      
 42 
     | 
    
         
            +
                        @idx.db.match_prefix("sequence:").size.should == 1
         
     | 
| 
      
 43 
     | 
    
         
            +
                      end
         
     | 
| 
      
 44 
     | 
    
         
            +
                      it "stores the sequence IDs" do
         
     | 
| 
      
 45 
     | 
    
         
            +
                        @idx.db.get("sequence:mm8.chr7").should == "0"
         
     | 
| 
      
 46 
     | 
    
         
            +
                      end
         
     | 
| 
      
 47 
     | 
    
         
            +
                      describe "loads sequence data correctly" do
         
     | 
| 
      
 48 
     | 
    
         
            +
                        before(:each) { @idx = @idx.reopen }
         
     | 
| 
      
 49 
     | 
    
         
            +
                        it "uses the first sequence appearing as the reference sequence" do
         
     | 
| 
      
 50 
     | 
    
         
            +
                          @idx.index_sequences.to_a.should == [["mm8.chr7", 0]]
         
     | 
| 
      
 51 
     | 
    
         
            +
                        end
         
     | 
| 
      
 52 
     | 
    
         
            +
                      end
         
     | 
| 
      
 53 
     | 
    
         
            +
                      after(:each) do
         
     | 
| 
      
 54 
     | 
    
         
            +
                        @idx.db.close
         
     | 
| 
      
 55 
     | 
    
         
            +
                      end
         
     | 
| 
      
 56 
     | 
    
         
            +
                    end
         
     | 
| 
      
 57 
     | 
    
         
            +
                  end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
                  describe ".open" do
         
     | 
| 
      
 60 
     | 
    
         
            +
                    it "opens an existing index successfully" do
         
     | 
| 
      
 61 
     | 
    
         
            +
                      @idx = KyotoIndex.open(TestData + 'mm8_chr7_tiny.kct')
         
     | 
| 
      
 62 
     | 
    
         
            +
                      @idx.db.count.should be > 8
         
     | 
| 
      
 63 
     | 
    
         
            +
                    end
         
     | 
| 
      
 64 
     | 
    
         
            +
                    it "populates #index_sequences" do
         
     | 
| 
      
 65 
     | 
    
         
            +
                      @idx = KyotoIndex.open(TestData + 'mm8_chr7_tiny.kct')
         
     | 
| 
      
 66 
     | 
    
         
            +
                      @idx.index_sequences.size.should be > 0
         
     | 
| 
      
 67 
     | 
    
         
            +
                      @idx.index_sequences['mm8.chr7'].should == 0
         
     | 
| 
      
 68 
     | 
    
         
            +
                    end
         
     | 
| 
      
 69 
     | 
    
         
            +
                    after(:each) do
         
     | 
| 
      
 70 
     | 
    
         
            +
                      @idx.db.close if @idx
         
     | 
| 
      
 71 
     | 
    
         
            +
                    end
         
     | 
| 
      
 72 
     | 
    
         
            +
                  end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
                  describe "#find" do
         
     | 
| 
      
 75 
     | 
    
         
            +
                    context "mm8_chr7" do
         
     | 
| 
      
 76 
     | 
    
         
            +
                      before(:each) do
         
     | 
| 
      
 77 
     | 
    
         
            +
                        @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 78 
     | 
    
         
            +
                        @idx = KyotoIndex.build(@p, '%')
         
     | 
| 
      
 79 
     | 
    
         
            +
                      end
         
     | 
| 
      
 80 
     | 
    
         
            +
             
     | 
| 
      
 81 
     | 
    
         
            +
                      it "returns a block given a range contained in the block" do
         
     | 
| 
      
 82 
     | 
    
         
            +
                        l = @idx.find([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 83 
     | 
    
         
            +
                                                                  80082334,
         
     | 
| 
      
 84 
     | 
    
         
            +
                                                                  80082338)],
         
     | 
| 
      
 85 
     | 
    
         
            +
                                            @p).to_a
         
     | 
| 
      
 86 
     | 
    
         
            +
                        l.size.should == 1
         
     | 
| 
      
 87 
     | 
    
         
            +
                        l[0].offset.should == 16
         
     | 
| 
      
 88 
     | 
    
         
            +
                      end
         
     | 
| 
      
 89 
     | 
    
         
            +
             
     | 
| 
      
 90 
     | 
    
         
            +
                      after(:each) do
         
     | 
| 
      
 91 
     | 
    
         
            +
                        @idx.db.close
         
     | 
| 
      
 92 
     | 
    
         
            +
                        @p.f.close
         
     | 
| 
      
 93 
     | 
    
         
            +
                      end
         
     | 
| 
      
 94 
     | 
    
         
            +
                    end
         
     | 
| 
      
 95 
     | 
    
         
            +
                  end
         
     | 
| 
      
 96 
     | 
    
         
            +
             
     | 
| 
      
 97 
     | 
    
         
            +
                  describe "#fetch_list" do
         
     | 
| 
      
 98 
     | 
    
         
            +
                    context "mm8_chr7" do
         
     | 
| 
      
 99 
     | 
    
         
            +
                      before(:each) do
         
     | 
| 
      
 100 
     | 
    
         
            +
                        @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 101 
     | 
    
         
            +
                        @idx = KyotoIndex.build(@p, '%')
         
     | 
| 
      
 102 
     | 
    
         
            +
                      end
         
     | 
| 
      
 103 
     | 
    
         
            +
                      it "returns a block spec given a range contained in the block" do
         
     | 
| 
      
 104 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 105 
     | 
    
         
            +
                                                                        80082334,
         
     | 
| 
      
 106 
     | 
    
         
            +
                                                                        80082338)])
         
     | 
| 
      
 107 
     | 
    
         
            +
                        l.size.should == 1
         
     | 
| 
      
 108 
     | 
    
         
            +
                        l[0][0].should == 16 # block offset
         
     | 
| 
      
 109 
     | 
    
         
            +
                      end
         
     | 
| 
      
 110 
     | 
    
         
            +
                      it "returns a block spec with correct size" do
         
     | 
| 
      
 111 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 112 
     | 
    
         
            +
                                                                        80082334,
         
     | 
| 
      
 113 
     | 
    
         
            +
                                                                        80082338)])
         
     | 
| 
      
 114 
     | 
    
         
            +
                        l.size.should == 1
         
     | 
| 
      
 115 
     | 
    
         
            +
                        l[0][0].should == 16 # block offset
         
     | 
| 
      
 116 
     | 
    
         
            +
                        l[0][1].should == 1087 # block size
         
     | 
| 
      
 117 
     | 
    
         
            +
                      end
         
     | 
| 
      
 118 
     | 
    
         
            +
                      it "returns a block spec given its range exactly" do
         
     | 
| 
      
 119 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 120 
     | 
    
         
            +
                                                                        80082334,
         
     | 
| 
      
 121 
     | 
    
         
            +
                                                                        80082368)])
         
     | 
| 
      
 122 
     | 
    
         
            +
                        l.size.should == 1
         
     | 
| 
      
 123 
     | 
    
         
            +
                        l[0][0].should == 16 # block offset
         
     | 
| 
      
 124 
     | 
    
         
            +
                      end
         
     | 
| 
      
 125 
     | 
    
         
            +
                      it "returns specs for adjoining blocks given a range partially in each" do
         
     | 
| 
      
 126 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 127 
     | 
    
         
            +
                                                                        80082360,
         
     | 
| 
      
 128 
     | 
    
         
            +
                                                                        80082370)])
         
     | 
| 
      
 129 
     | 
    
         
            +
                        l.size.should == 2
         
     | 
| 
      
 130 
     | 
    
         
            +
                        l.collect { |e| e[0] }.should == [16, 1103]
         
     | 
| 
      
 131 
     | 
    
         
            +
                      end
         
     | 
| 
      
 132 
     | 
    
         
            +
                      it "returns a block spec given a range ending in it" do
         
     | 
| 
      
 133 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 134 
     | 
    
         
            +
                                                                        80082330,
         
     | 
| 
      
 135 
     | 
    
         
            +
                                                                        80082339)])
         
     | 
| 
      
 136 
     | 
    
         
            +
                        l.size.should == 1
         
     | 
| 
      
 137 
     | 
    
         
            +
                        l[0][0].should == 16 # block offset
         
     | 
| 
      
 138 
     | 
    
         
            +
                      end
         
     | 
| 
      
 139 
     | 
    
         
            +
                      it "returns no block spec given a zero-based range ending at a block start" do
         
     | 
| 
      
 140 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 141 
     | 
    
         
            +
                                                                        80082330,
         
     | 
| 
      
 142 
     | 
    
         
            +
                                                                        80082334)])
         
     | 
| 
      
 143 
     | 
    
         
            +
                        l.size.should == 0
         
     | 
| 
      
 144 
     | 
    
         
            +
                      end
         
     | 
| 
      
 145 
     | 
    
         
            +
                      it "returns a block spec given a range beginning in it" do
         
     | 
| 
      
 146 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 147 
     | 
    
         
            +
                                                                        80083009,
         
     | 
| 
      
 148 
     | 
    
         
            +
                                                                        80083220)])
         
     | 
| 
      
 149 
     | 
    
         
            +
                        l.size.should == 1
         
     | 
| 
      
 150 
     | 
    
         
            +
                        l[0][0].should == 10113 # block offset
         
     | 
| 
      
 151 
     | 
    
         
            +
                      end
         
     | 
| 
      
 152 
     | 
    
         
            +
                      it "returns no block spec given a range beginning at its end" do
         
     | 
| 
      
 153 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 154 
     | 
    
         
            +
                                                                        80083156,
         
     | 
| 
      
 155 
     | 
    
         
            +
                                                                        80083200)])
         
     | 
| 
      
 156 
     | 
    
         
            +
                        l.size.should == 0
         
     | 
| 
      
 157 
     | 
    
         
            +
                      end
         
     | 
| 
      
 158 
     | 
    
         
            +
                      it "returns specs for all blocks given a range fitting a larger bin" do
         
     | 
| 
      
 159 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 160 
     | 
    
         
            +
                                                                        0,
         
     | 
| 
      
 161 
     | 
    
         
            +
                                                                        80083200)])
         
     | 
| 
      
 162 
     | 
    
         
            +
                        l.size.should == 8
         
     | 
| 
      
 163 
     | 
    
         
            +
                      end
         
     | 
| 
      
 164 
     | 
    
         
            +
                      it "returns no blocks given a range outside" do
         
     | 
| 
      
 165 
     | 
    
         
            +
                        l = @idx.fetch_list([GenomicInterval.zero_based('mm8.chr7',
         
     | 
| 
      
 166 
     | 
    
         
            +
                                                                        80083200,
         
     | 
| 
      
 167 
     | 
    
         
            +
                                                                        80083300)])
         
     | 
| 
      
 168 
     | 
    
         
            +
                      end
         
     | 
| 
      
 169 
     | 
    
         
            +
                      after(:each) do
         
     | 
| 
      
 170 
     | 
    
         
            +
                        if @idx
         
     | 
| 
      
 171 
     | 
    
         
            +
                          @idx.db.close
         
     | 
| 
      
 172 
     | 
    
         
            +
                        end
         
     | 
| 
      
 173 
     | 
    
         
            +
                      end
         
     | 
| 
      
 174 
     | 
    
         
            +
                    end
         
     | 
| 
      
 175 
     | 
    
         
            +
                  end
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                  describe "#overlaps?" do
         
     | 
| 
      
 178 
     | 
    
         
            +
                    before(:each) do
         
     | 
| 
      
 179 
     | 
    
         
            +
                      @idx = KyotoIndex.new('%')
         
     | 
| 
      
 180 
     | 
    
         
            +
                    end
         
     | 
| 
      
 181 
     | 
    
         
            +
                    def check_overlap(x, y)
         
     | 
| 
      
 182 
     | 
    
         
            +
                      i = x[0]...x[1]
         
     | 
| 
      
 183 
     | 
    
         
            +
                      @idx.overlaps?(i, y[0], y[1])
         
     | 
| 
      
 184 
     | 
    
         
            +
                    end
         
     | 
| 
      
 185 
     | 
    
         
            +
                    it "handles equal intervals" do
         
     | 
| 
      
 186 
     | 
    
         
            +
                      check_overlap([0, 10],
         
     | 
| 
      
 187 
     | 
    
         
            +
                                    [0, 10]).should be_true
         
     | 
| 
      
 188 
     | 
    
         
            +
                    end
         
     | 
| 
      
 189 
     | 
    
         
            +
                    it "handles X contains Y" do
         
     | 
| 
      
 190 
     | 
    
         
            +
                      check_overlap([0, 10],
         
     | 
| 
      
 191 
     | 
    
         
            +
                                    [0, 9]).should be_true
         
     | 
| 
      
 192 
     | 
    
         
            +
                      check_overlap([0, 10],
         
     | 
| 
      
 193 
     | 
    
         
            +
                                    [1, 9]).should be_true
         
     | 
| 
      
 194 
     | 
    
         
            +
                      check_overlap([0, 10],
         
     | 
| 
      
 195 
     | 
    
         
            +
                                    [1, 10]).should be_true
         
     | 
| 
      
 196 
     | 
    
         
            +
                    end
         
     | 
| 
      
 197 
     | 
    
         
            +
                    it "handles Y contains X" do
         
     | 
| 
      
 198 
     | 
    
         
            +
                      check_overlap([0, 9],
         
     | 
| 
      
 199 
     | 
    
         
            +
                                    [0, 10]).should be_true
         
     | 
| 
      
 200 
     | 
    
         
            +
                      check_overlap([1, 9],
         
     | 
| 
      
 201 
     | 
    
         
            +
                                    [0, 10]).should be_true
         
     | 
| 
      
 202 
     | 
    
         
            +
                      check_overlap([1, 10],
         
     | 
| 
      
 203 
     | 
    
         
            +
                                    [0, 10]).should be_true
         
     | 
| 
      
 204 
     | 
    
         
            +
                    end
         
     | 
| 
      
 205 
     | 
    
         
            +
                    it "handles partial overlap" do
         
     | 
| 
      
 206 
     | 
    
         
            +
                      check_overlap([0, 9],
         
     | 
| 
      
 207 
     | 
    
         
            +
                                    [1, 10]).should be_true
         
     | 
| 
      
 208 
     | 
    
         
            +
                      check_overlap([1, 10],
         
     | 
| 
      
 209 
     | 
    
         
            +
                                    [0, 9]).should be_true
         
     | 
| 
      
 210 
     | 
    
         
            +
                    end
         
     | 
| 
      
 211 
     | 
    
         
            +
                    it "handles end cases" do
         
     | 
| 
      
 212 
     | 
    
         
            +
                      check_overlap([0, 10],
         
     | 
| 
      
 213 
     | 
    
         
            +
                                    [10, 15]).should be_false
         
     | 
| 
      
 214 
     | 
    
         
            +
                      check_overlap([10, 15],
         
     | 
| 
      
 215 
     | 
    
         
            +
                                    [0, 10]).should be_false
         
     | 
| 
      
 216 
     | 
    
         
            +
                    end
         
     | 
| 
      
 217 
     | 
    
         
            +
                    it "handles separated intervals" do
         
     | 
| 
      
 218 
     | 
    
         
            +
                      check_overlap([0, 10], [15, 20]).should be_false
         
     | 
| 
      
 219 
     | 
    
         
            +
                      check_overlap([15, 20], [0, 10]).should be_false
         
     | 
| 
      
 220 
     | 
    
         
            +
                    end
         
     | 
| 
      
 221 
     | 
    
         
            +
                    after(:each) do
         
     | 
| 
      
 222 
     | 
    
         
            +
                      @idx.db.close
         
     | 
| 
      
 223 
     | 
    
         
            +
                    end
         
     | 
| 
      
 224 
     | 
    
         
            +
                  end
         
     | 
| 
      
 225 
     | 
    
         
            +
             
     | 
| 
      
 226 
     | 
    
         
            +
                  describe "#entries_for" do
         
     | 
| 
      
 227 
     | 
    
         
            +
                    before(:each) do
         
     | 
| 
      
 228 
     | 
    
         
            +
                      @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 229 
     | 
    
         
            +
                      @block = @p.parse_block
         
     | 
| 
      
 230 
     | 
    
         
            +
                      @idx = KyotoIndex.new('%')
         
     | 
| 
      
 231 
     | 
    
         
            +
                    end
         
     | 
| 
      
 232 
     | 
    
         
            +
                    context "single ref seq" do
         
     | 
| 
      
 233 
     | 
    
         
            +
                      before(:each) do
         
     | 
| 
      
 234 
     | 
    
         
            +
                        @idx.index_sequences = { 'mm8.chr7' => 0 }
         
     | 
| 
      
 235 
     | 
    
         
            +
                        @e = @idx.entries_for(@block)
         
     | 
| 
      
 236 
     | 
    
         
            +
                      end
         
     | 
| 
      
 237 
     | 
    
         
            +
                      it "gives the correct key data" do
         
     | 
| 
      
 238 
     | 
    
         
            +
                        _, seq, bin, i_start, i_end = @e.keys.first.unpack("CCS>L>L>")
         
     | 
| 
      
 239 
     | 
    
         
            +
                        seq.should == 0
         
     | 
| 
      
 240 
     | 
    
         
            +
                        bin.should == 1195
         
     | 
| 
      
 241 
     | 
    
         
            +
                        i_start.should == 80082334
         
     | 
| 
      
 242 
     | 
    
         
            +
                        i_end.should == 80082368
         
     | 
| 
      
 243 
     | 
    
         
            +
                      end
         
     | 
| 
      
 244 
     | 
    
         
            +
                      it "gives the correct offset" do
         
     | 
| 
      
 245 
     | 
    
         
            +
                        b_offset, b_len = @e.values.first.unpack("Q>L>")
         
     | 
| 
      
 246 
     | 
    
         
            +
                        b_offset.should == 16
         
     | 
| 
      
 247 
     | 
    
         
            +
                      end
         
     | 
| 
      
 248 
     | 
    
         
            +
                      it "gives the correct length" do
         
     | 
| 
      
 249 
     | 
    
         
            +
                        b_offset, b_len = @e.values.first.unpack("Q>L>")
         
     | 
| 
      
 250 
     | 
    
         
            +
                        b_len.should == 1087
         
     | 
| 
      
 251 
     | 
    
         
            +
                      end
         
     | 
| 
      
 252 
     | 
    
         
            +
                    end
         
     | 
| 
      
 253 
     | 
    
         
            +
                    after(:each) do
         
     | 
| 
      
 254 
     | 
    
         
            +
                      @p.f.close
         
     | 
| 
      
 255 
     | 
    
         
            +
                      @idx.db.close
         
     | 
| 
      
 256 
     | 
    
         
            +
                    end
         
     | 
| 
      
 257 
     | 
    
         
            +
                  end
         
     | 
| 
      
 258 
     | 
    
         
            +
             
     | 
| 
      
 259 
     | 
    
         
            +
                end
         
     | 
| 
      
 260 
     | 
    
         
            +
             
     | 
| 
      
 261 
     | 
    
         
            +
                describe "#species" do
         
     | 
| 
      
 262 
     | 
    
         
            +
                  before(:each) do
         
     | 
| 
      
 263 
     | 
    
         
            +
                    @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 264 
     | 
    
         
            +
                    @idx = KyotoIndex.build(@p, '%')
         
     | 
| 
      
 265 
     | 
    
         
            +
                  end
         
     | 
| 
      
 266 
     | 
    
         
            +
                  shared_examples "species" do
         
     | 
| 
      
 267 
     | 
    
         
            +
                    it "records the correct number of species" do
         
     | 
| 
      
 268 
     | 
    
         
            +
                      @idx.species.size.should == 11
         
     | 
| 
      
 269 
     | 
    
         
            +
                    end
         
     | 
| 
      
 270 
     | 
    
         
            +
                    it "sets species_max_id correctly" do
         
     | 
| 
      
 271 
     | 
    
         
            +
                      @idx.species_max_id.should == 10
         
     | 
| 
      
 272 
     | 
    
         
            +
                    end
         
     | 
| 
      
 273 
     | 
    
         
            +
                  end
         
     | 
| 
      
 274 
     | 
    
         
            +
                  describe "after building index" do
         
     | 
| 
      
 275 
     | 
    
         
            +
                    include_examples "species"
         
     | 
| 
      
 276 
     | 
    
         
            +
                    it "records species in order" do
         
     | 
| 
      
 277 
     | 
    
         
            +
                      @idx.db["species:mm8"].should == "0"
         
     | 
| 
      
 278 
     | 
    
         
            +
                    end
         
     | 
| 
      
 279 
     | 
    
         
            +
                  end
         
     | 
| 
      
 280 
     | 
    
         
            +
                  describe "after loading index" do
         
     | 
| 
      
 281 
     | 
    
         
            +
                    before(:each) { @idx = @idx.reopen }
         
     | 
| 
      
 282 
     | 
    
         
            +
                    include_examples "species"
         
     | 
| 
      
 283 
     | 
    
         
            +
                  end
         
     | 
| 
      
 284 
     | 
    
         
            +
                end
         
     | 
| 
      
 285 
     | 
    
         
            +
             
     | 
| 
      
 286 
     | 
    
         
            +
                describe "Filter classes" do
         
     | 
| 
      
 287 
     | 
    
         
            +
                  before(:each) do 
         
     | 
| 
      
 288 
     | 
    
         
            +
                    @p = Parser.new(TestData + 'mm8_chr7_tiny.maf')
         
     | 
| 
      
 289 
     | 
    
         
            +
                    @idx = KyotoIndex.build(@p, '%')
         
     | 
| 
      
 290 
     | 
    
         
            +
                  end
         
     | 
| 
      
 291 
     | 
    
         
            +
             
     | 
| 
      
 292 
     | 
    
         
            +
                  describe AllSpeciesFilter do
         
     | 
| 
      
 293 
     | 
    
         
            +
                    def fake_entry_with(species_l)
         
     | 
| 
      
 294 
     | 
    
         
            +
                      ids = species_l.collect {|s| @idx.species.fetch(s)}
         
     | 
| 
      
 295 
     | 
    
         
            +
                      vec = ids.collect { |id| 1 << id }.reduce(0, :|)
         
     | 
| 
      
 296 
     | 
    
         
            +
                      return ['', [0, 0, 0, 0, vec].pack(KyotoIndex::VAL_FMT)]
         
     | 
| 
      
 297 
     | 
    
         
            +
                    end
         
     | 
| 
      
 298 
     | 
    
         
            +
             
     | 
| 
      
 299 
     | 
    
         
            +
                   context "with an empty set" do
         
     | 
| 
      
 300 
     | 
    
         
            +
                      before(:each) do
         
     | 
| 
      
 301 
     | 
    
         
            +
                        @filter = AllSpeciesFilter.new([], @idx)
         
     | 
| 
      
 302 
     | 
    
         
            +
                      end
         
     | 
| 
      
 303 
     | 
    
         
            +
                      it "matches anything" do
         
     | 
| 
      
 304 
     | 
    
         
            +
                        e = fake_entry_with(%w(mm8 rn4 oryCun1))
         
     | 
| 
      
 305 
     | 
    
         
            +
                        @filter.match(e).should be_true
         
     | 
| 
      
 306 
     | 
    
         
            +
                      end
         
     | 
| 
      
 307 
     | 
    
         
            +
                    end
         
     | 
| 
      
 308 
     | 
    
         
            +
                    context "with [mm8 rn4]" do
         
     | 
| 
      
 309 
     | 
    
         
            +
                      before(:each) do
         
     | 
| 
      
 310 
     | 
    
         
            +
                        @filter = AllSpeciesFilter.new(%w(mm8 rn4), @idx)
         
     | 
| 
      
 311 
     | 
    
         
            +
                      end
         
     | 
| 
      
 312 
     | 
    
         
            +
                      it "does not match an empty entry" do
         
     | 
| 
      
 313 
     | 
    
         
            +
                        e = fake_entry_with(%w())
         
     | 
| 
      
 314 
     | 
    
         
            +
                        KVHelpers.extract_species_vec(e).should == 0
         
     | 
| 
      
 315 
     | 
    
         
            +
                        @filter.bs.should_not == 0
         
     | 
| 
      
 316 
     | 
    
         
            +
                        @filter.match(e).should be_false
         
     | 
| 
      
 317 
     | 
    
         
            +
                      end
         
     | 
| 
      
 318 
     | 
    
         
            +
                      it "does not match an entry with mm8" do
         
     | 
| 
      
 319 
     | 
    
         
            +
                        e = fake_entry_with(%w(mm8))
         
     | 
| 
      
 320 
     | 
    
         
            +
                        @filter.match(e).should be_false
         
     | 
| 
      
 321 
     | 
    
         
            +
                      end
         
     | 
| 
      
 322 
     | 
    
         
            +
                      it "does not match an entry with mm8 oryCun1" do
         
     | 
| 
      
 323 
     | 
    
         
            +
                        e = fake_entry_with(%w(mm8 oryCun1))
         
     | 
| 
      
 324 
     | 
    
         
            +
                        @filter.match(e).should be_false
         
     | 
| 
      
 325 
     | 
    
         
            +
                      end
         
     | 
| 
      
 326 
     | 
    
         
            +
                      it "matches an entry with mm8 rn4" do
         
     | 
| 
      
 327 
     | 
    
         
            +
                        e = fake_entry_with(%w(mm8 rn4))
         
     | 
| 
      
 328 
     | 
    
         
            +
                        @filter.match(e).should be_true
         
     | 
| 
      
 329 
     | 
    
         
            +
                      end
         
     | 
| 
      
 330 
     | 
    
         
            +
                      it "does not match an entry with mm8 rn4 oryCun1" do
         
     | 
| 
      
 331 
     | 
    
         
            +
                        e = fake_entry_with(%w(mm8 rn4 oryCun1))
         
     | 
| 
      
 332 
     | 
    
         
            +
                        @filter.match(e).should be_true
         
     | 
| 
      
 333 
     | 
    
         
            +
                      end
         
     | 
| 
      
 334 
     | 
    
         
            +
                    end
         
     | 
| 
      
 335 
     | 
    
         
            +
                  end # AllSpeciesFilter
         
     | 
| 
      
 336 
     | 
    
         
            +
             
     | 
| 
      
 337 
     | 
    
         
            +
                  describe AtLeastNSequencesFilter do
         
     | 
| 
      
 338 
     | 
    
         
            +
                    def fake_entry_with(n)
         
     | 
| 
      
 339 
     | 
    
         
            +
                      return ['', [0, 0, 0, n, 0].pack(KyotoIndex::VAL_FMT)]
         
     | 
| 
      
 340 
     | 
    
         
            +
                    end
         
     | 
| 
      
 341 
     | 
    
         
            +
                    context "n = 3" do
         
     | 
| 
      
 342 
     | 
    
         
            +
                      before(:each) do
         
     | 
| 
      
 343 
     | 
    
         
            +
                        @filter = AtLeastNSequencesFilter.new(3, @idx)
         
     | 
| 
      
 344 
     | 
    
         
            +
                      end
         
     | 
| 
      
 345 
     | 
    
         
            +
                      it "does not match 2 sequences" do
         
     | 
| 
      
 346 
     | 
    
         
            +
                        e = fake_entry_with(2)
         
     | 
| 
      
 347 
     | 
    
         
            +
                        @filter.match(e).should be_false
         
     | 
| 
      
 348 
     | 
    
         
            +
                      end
         
     | 
| 
      
 349 
     | 
    
         
            +
                      it "matches 3 sequences" do
         
     | 
| 
      
 350 
     | 
    
         
            +
                        e = fake_entry_with(3)
         
     | 
| 
      
 351 
     | 
    
         
            +
                        @filter.match(e).should be_true
         
     | 
| 
      
 352 
     | 
    
         
            +
                      end
         
     | 
| 
      
 353 
     | 
    
         
            +
                    end
         
     | 
| 
      
 354 
     | 
    
         
            +
                  end # AtLeastNSequencesFilter
         
     | 
| 
      
 355 
     | 
    
         
            +
                  
         
     | 
| 
      
 356 
     | 
    
         
            +
                  after(:each) do
         
     | 
| 
      
 357 
     | 
    
         
            +
                    @idx.close
         
     | 
| 
      
 358 
     | 
    
         
            +
                  end
         
     | 
| 
      
 359 
     | 
    
         
            +
                end # filter classes
         
     | 
| 
      
 360 
     | 
    
         
            +
             
     | 
| 
      
 361 
     | 
    
         
            +
              end # module MAF
         
     | 
| 
      
 362 
     | 
    
         
            +
              
         
     | 
| 
      
 363 
     | 
    
         
            +
            end # module Bio
         
     |