bio-maf 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
 - data/Gemfile +2 -1
 - data/README.md +98 -29
 - data/Rakefile +6 -2
 - data/bin/maf_tile +59 -35
 - data/bio-maf.gemspec +4 -3
 - data/features/block-joining.feature +32 -0
 - data/features/dir-access.feature +46 -0
 - data/features/maf-indexing.feature +23 -0
 - data/features/maf-to-fasta.feature +9 -0
 - data/features/slice.feature +54 -0
 - data/features/step_definitions/dir-access_steps.rb +15 -0
 - data/features/step_definitions/file_steps.rb +7 -0
 - data/features/step_definitions/gap_removal_steps.rb +4 -0
 - data/features/step_definitions/index_steps.rb +3 -3
 - data/features/step_definitions/output_steps.rb +9 -1
 - data/features/step_definitions/parse_steps.rb +13 -2
 - data/features/step_definitions/query_steps.rb +7 -6
 - data/features/step_definitions/slice_steps.rb +15 -0
 - data/features/step_definitions/{gap-filling_steps.rb → tiling_steps.rb} +0 -0
 - data/features/support/aruba.rb +1 -0
 - data/features/support/env.rb +3 -1
 - data/features/{gap-filling.feature → tiling.feature} +85 -0
 - data/lib/bio/maf/index.rb +223 -11
 - data/lib/bio/maf/maf.rb +209 -0
 - data/lib/bio/maf/parser.rb +190 -111
 - data/lib/bio/maf/tiler.rb +33 -6
 - data/man/maf_index.1 +1 -1
 - data/man/maf_tile.1 +7 -7
 - data/man/maf_tile.1.ronn +21 -13
 - data/man/maf_to_fasta.1 +1 -1
 - data/spec/bio/maf/index_spec.rb +99 -0
 - data/spec/bio/maf/maf_spec.rb +184 -0
 - data/spec/bio/maf/parser_spec.rb +75 -115
 - data/spec/bio/maf/tiler_spec.rb +44 -0
 - data/test/data/chr22_ieq2.maf +11 -0
 - data/test/data/gap-1.kct +0 -0
 - data/test/data/gap-1.maf +9 -0
 - data/test/data/gap-filled1.fa +6 -0
 - data/test/data/gap-sp1.fa.gz +0 -0
 - data/test/data/mm8_chr7_tiny_slice1.maf +9 -0
 - data/test/data/mm8_chr7_tiny_slice2.maf +10 -0
 - data/test/data/mm8_chr7_tiny_slice3.maf +10 -0
 - data/test/data/mm8_chrM_tiny.kct +0 -0
 - data/test/data/mm8_chrM_tiny.maf +1000 -0
 - metadata +59 -7
 
    
        data/lib/bio/maf/maf.rb
    CHANGED
    
    | 
         @@ -1,4 +1,15 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'bio-alignment'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
       1 
3 
     | 
    
         
             
            module Bio
         
     | 
| 
      
 4 
     | 
    
         
            +
              class GenomicInterval
         
     | 
| 
      
 5 
     | 
    
         
            +
                def intersection(other)
         
     | 
| 
      
 6 
     | 
    
         
            +
                  raise ArgumentError unless self.chrom == other.chrom
         
     | 
| 
      
 7 
     | 
    
         
            +
                  GenomicInterval.new(self.chrom,
         
     | 
| 
      
 8 
     | 
    
         
            +
                                      [self.chr_start, other.chr_start].max,
         
     | 
| 
      
 9 
     | 
    
         
            +
                                      [self.chr_end, other.chr_end].min)
         
     | 
| 
      
 10 
     | 
    
         
            +
                end
         
     | 
| 
      
 11 
     | 
    
         
            +
              end
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
       2 
13 
     | 
    
         
             
              module MAF
         
     | 
| 
       3 
14 
     | 
    
         | 
| 
       4 
15 
     | 
    
         
             
                # A MAF header, containing the variable-value pairs from the first
         
     | 
| 
         @@ -12,6 +23,12 @@ module Bio 
     | 
|
| 
       12 
23 
     | 
    
         
             
                  # @return [Hash]
         
     | 
| 
       13 
24 
     | 
    
         
             
                  attr_accessor :alignment_params
         
     | 
| 
       14 
25 
     | 
    
         | 
| 
      
 26 
     | 
    
         
            +
                  # Create a default header with version=1.
         
     | 
| 
      
 27 
     | 
    
         
            +
                  # @return [Header]
         
     | 
| 
      
 28 
     | 
    
         
            +
                  def Header.default
         
     | 
| 
      
 29 
     | 
    
         
            +
                    Header.new({:version => 1}, nil)
         
     | 
| 
      
 30 
     | 
    
         
            +
                  end
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
       15 
32 
     | 
    
         
             
                  def initialize(vars, params)
         
     | 
| 
       16 
33 
     | 
    
         
             
                    @vars = vars
         
     | 
| 
       17 
34 
     | 
    
         
             
                    @alignment_params = params
         
     | 
| 
         @@ -47,6 +64,7 @@ module Bio 
     | 
|
| 
       47 
64 
     | 
    
         
             
                  attr_reader :size
         
     | 
| 
       48 
65 
     | 
    
         | 
| 
       49 
66 
     | 
    
         
             
                  def initialize(vars, sequences, offset, size, filtered)
         
     | 
| 
      
 67 
     | 
    
         
            +
                    #raise ArgumentError, "no sequences given for block at offset #{offset}!" unless sequences && sequences.first
         
     | 
| 
       50 
68 
     | 
    
         
             
                    @vars = vars
         
     | 
| 
       51 
69 
     | 
    
         
             
                    @sequences = sequences
         
     | 
| 
       52 
70 
     | 
    
         
             
                    @offset = offset
         
     | 
| 
         @@ -79,6 +97,11 @@ module Bio 
     | 
|
| 
       79 
97 
     | 
    
         
             
                    @filtered
         
     | 
| 
       80 
98 
     | 
    
         
             
                  end
         
     | 
| 
       81 
99 
     | 
    
         | 
| 
      
 100 
     | 
    
         
            +
                  def to_bio_alignment
         
     | 
| 
      
 101 
     | 
    
         
            +
                    ba_seq = sequences.collect { |s| s.to_bio_alignment }
         
     | 
| 
      
 102 
     | 
    
         
            +
                    Bio::BioAlignment::Alignment.new(ba_seq)
         
     | 
| 
      
 103 
     | 
    
         
            +
                  end
         
     | 
| 
      
 104 
     | 
    
         
            +
             
     | 
| 
       82 
105 
     | 
    
         
             
                  GAP = /-+/
         
     | 
| 
       83 
106 
     | 
    
         | 
| 
       84 
107 
     | 
    
         
             
                  # Remove gaps present in all sequences. These would generally
         
     | 
| 
         @@ -116,6 +139,96 @@ module Bio 
     | 
|
| 
       116 
139 
     | 
    
         
             
                    gaps.size
         
     | 
| 
       117 
140 
     | 
    
         
             
                  end
         
     | 
| 
       118 
141 
     | 
    
         | 
| 
      
 142 
     | 
    
         
            +
                  # Returns a new Block covering only the region where it overlaps
         
     | 
| 
      
 143 
     | 
    
         
            +
                  # the given interval.
         
     | 
| 
      
 144 
     | 
    
         
            +
                  # @param [Bio::GenomicInterval] interval to slice the block with
         
     | 
| 
      
 145 
     | 
    
         
            +
                  # @return [Block] block covering intersection with interval
         
     | 
| 
      
 146 
     | 
    
         
            +
                  def slice(interval)
         
     | 
| 
      
 147 
     | 
    
         
            +
                    case interval.compare(ref_seq.interval)
         
     | 
| 
      
 148 
     | 
    
         
            +
                    when :equal
         
     | 
| 
      
 149 
     | 
    
         
            +
                      return self
         
     | 
| 
      
 150 
     | 
    
         
            +
                    when :contains, :contained_by, :left_overlapped, :right_overlapped
         
     | 
| 
      
 151 
     | 
    
         
            +
                      _slice(interval.intersection(ref_seq.interval))
         
     | 
| 
      
 152 
     | 
    
         
            +
                    when :left_adjacent, :right_adjacent, :left_off, :right_off
         
     | 
| 
      
 153 
     | 
    
         
            +
                      raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}"
         
     | 
| 
      
 154 
     | 
    
         
            +
                    when :different_chrom
         
     | 
| 
      
 155 
     | 
    
         
            +
                      raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!"
         
     | 
| 
      
 156 
     | 
    
         
            +
                    else
         
     | 
| 
      
 157 
     | 
    
         
            +
                      raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}"
         
     | 
| 
      
 158 
     | 
    
         
            +
                    end
         
     | 
| 
      
 159 
     | 
    
         
            +
                  end
         
     | 
| 
      
 160 
     | 
    
         
            +
             
     | 
| 
      
 161 
     | 
    
         
            +
                  def _slice(interval)
         
     | 
| 
      
 162 
     | 
    
         
            +
                    range = _slice_text_range(interval)
         
     | 
| 
      
 163 
     | 
    
         
            +
                    s2 = sequences.collect { |s| s.slice(range) }
         
     | 
| 
      
 164 
     | 
    
         
            +
                    v2 = vars.dup
         
     | 
| 
      
 165 
     | 
    
         
            +
                    #v2[:score] = '0.0'
         
     | 
| 
      
 166 
     | 
    
         
            +
                    # TODO: should the filtered param be #modified? instead?
         
     | 
| 
      
 167 
     | 
    
         
            +
                    Block.new(v2, s2, offset, size, @filtered)
         
     | 
| 
      
 168 
     | 
    
         
            +
                  end
         
     | 
| 
      
 169 
     | 
    
         
            +
             
     | 
| 
      
 170 
     | 
    
         
            +
                  def _slice_text_range(interval)
         
     | 
| 
      
 171 
     | 
    
         
            +
                    i_start  = interval.zero_start
         
     | 
| 
      
 172 
     | 
    
         
            +
                    i_end    = interval.zero_end
         
     | 
| 
      
 173 
     | 
    
         
            +
                    g_pos    = ref_seq.start
         
     | 
| 
      
 174 
     | 
    
         
            +
                    t_start  = nil
         
     | 
| 
      
 175 
     | 
    
         
            +
                    t_end    = nil
         
     | 
| 
      
 176 
     | 
    
         
            +
                    ref_seq.text.each_char.each_with_index do |c, t_pos|
         
     | 
| 
      
 177 
     | 
    
         
            +
                      if c != '-'
         
     | 
| 
      
 178 
     | 
    
         
            +
                        # non-gap
         
     | 
| 
      
 179 
     | 
    
         
            +
                        if g_pos == i_start
         
     | 
| 
      
 180 
     | 
    
         
            +
                          t_start = t_pos
         
     | 
| 
      
 181 
     | 
    
         
            +
                        end
         
     | 
| 
      
 182 
     | 
    
         
            +
                        g_pos += 1
         
     | 
| 
      
 183 
     | 
    
         
            +
                        if t_start && g_pos == i_end
         
     | 
| 
      
 184 
     | 
    
         
            +
                          t_end = t_pos + 1
         
     | 
| 
      
 185 
     | 
    
         
            +
                          break
         
     | 
| 
      
 186 
     | 
    
         
            +
                        end
         
     | 
| 
      
 187 
     | 
    
         
            +
                      end
         
     | 
| 
      
 188 
     | 
    
         
            +
                    end
         
     | 
| 
      
 189 
     | 
    
         
            +
                    unless t_start && t_end
         
     | 
| 
      
 190 
     | 
    
         
            +
                      raise "did not find start and end for #{interval} in #{ref_seq.inspect}!"
         
     | 
| 
      
 191 
     | 
    
         
            +
                    end
         
     | 
| 
      
 192 
     | 
    
         
            +
                    return t_start...t_end
         
     | 
| 
      
 193 
     | 
    
         
            +
                  end
         
     | 
| 
      
 194 
     | 
    
         
            +
             
     | 
| 
      
 195 
     | 
    
         
            +
                  def joinable_with?(other)
         
     | 
| 
      
 196 
     | 
    
         
            +
                    if sequences.size == other.sequences.size
         
     | 
| 
      
 197 
     | 
    
         
            +
                      r1 = ref_seq
         
     | 
| 
      
 198 
     | 
    
         
            +
                      r2 = other.ref_seq
         
     | 
| 
      
 199 
     | 
    
         
            +
                      return false if r1.source != r2.source
         
     | 
| 
      
 200 
     | 
    
         
            +
                      return false if r1.end != r2.start
         
     | 
| 
      
 201 
     | 
    
         
            +
                      rest = sequences.each_with_index
         
     | 
| 
      
 202 
     | 
    
         
            +
                      rest.next
         
     | 
| 
      
 203 
     | 
    
         
            +
                      mismatch = rest.find do |s1, i|
         
     | 
| 
      
 204 
     | 
    
         
            +
                        s2 = other.seq_from(s1.source, i)
         
     | 
| 
      
 205 
     | 
    
         
            +
                        (! s2) || ! s1.joinable_with?(s2)
         
     | 
| 
      
 206 
     | 
    
         
            +
                      end
         
     | 
| 
      
 207 
     | 
    
         
            +
                      return (! mismatch)
         
     | 
| 
      
 208 
     | 
    
         
            +
                    else
         
     | 
| 
      
 209 
     | 
    
         
            +
                      return false
         
     | 
| 
      
 210 
     | 
    
         
            +
                    end
         
     | 
| 
      
 211 
     | 
    
         
            +
                  end
         
     | 
| 
      
 212 
     | 
    
         
            +
             
     | 
| 
      
 213 
     | 
    
         
            +
                  def join(other)
         
     | 
| 
      
 214 
     | 
    
         
            +
                    nseq = sequences.each_with_index.collect do |s1, i|
         
     | 
| 
      
 215 
     | 
    
         
            +
                      s2 = other.seq_from(s1.source, i)
         
     | 
| 
      
 216 
     | 
    
         
            +
                      s1.join(s2)
         
     | 
| 
      
 217 
     | 
    
         
            +
                    end
         
     | 
| 
      
 218 
     | 
    
         
            +
                    v2 = vars.dup
         
     | 
| 
      
 219 
     | 
    
         
            +
                    v2[:score] = '0.0'
         
     | 
| 
      
 220 
     | 
    
         
            +
                    Block.new(v2, nseq, offset, nil, @filtered)
         
     | 
| 
      
 221 
     | 
    
         
            +
                  end
         
     | 
| 
      
 222 
     | 
    
         
            +
             
     | 
| 
      
 223 
     | 
    
         
            +
                  def seq_from(src, pos_guess)
         
     | 
| 
      
 224 
     | 
    
         
            +
                    sg = sequences[pos_guess]
         
     | 
| 
      
 225 
     | 
    
         
            +
                    if sg.source == src
         
     | 
| 
      
 226 
     | 
    
         
            +
                      sg
         
     | 
| 
      
 227 
     | 
    
         
            +
                    else
         
     | 
| 
      
 228 
     | 
    
         
            +
                      sequences.find { |s| s.source == src }
         
     | 
| 
      
 229 
     | 
    
         
            +
                    end
         
     | 
| 
      
 230 
     | 
    
         
            +
                  end
         
     | 
| 
      
 231 
     | 
    
         
            +
             
     | 
| 
       119 
232 
     | 
    
         
             
                end
         
     | 
| 
       120 
233 
     | 
    
         | 
| 
       121 
234 
     | 
    
         
             
                # A sequence within an alignment block.
         
     | 
| 
         @@ -153,6 +266,29 @@ module Bio 
     | 
|
| 
       153 
266 
     | 
    
         
             
                    start + size
         
     | 
| 
       154 
267 
     | 
    
         
             
                  end
         
     | 
| 
       155 
268 
     | 
    
         | 
| 
      
 269 
     | 
    
         
            +
                  def interval
         
     | 
| 
      
 270 
     | 
    
         
            +
                    GenomicInterval.zero_based(self.source, self.start, self.end)
         
     | 
| 
      
 271 
     | 
    
         
            +
                  end
         
     | 
| 
      
 272 
     | 
    
         
            +
             
     | 
| 
      
 273 
     | 
    
         
            +
                  def slice(range)
         
     | 
| 
      
 274 
     | 
    
         
            +
                    before = text.slice(0...(range.begin))
         
     | 
| 
      
 275 
     | 
    
         
            +
                    non_gap_prev = before.delete("-").size
         
     | 
| 
      
 276 
     | 
    
         
            +
                    new_text = text.slice(range)
         
     | 
| 
      
 277 
     | 
    
         
            +
                    unless new_text
         
     | 
| 
      
 278 
     | 
    
         
            +
                      raise "could not extract slice #{range} from #{self.inspect}!"
         
     | 
| 
      
 279 
     | 
    
         
            +
                    end
         
     | 
| 
      
 280 
     | 
    
         
            +
                    non_gap_text = new_text.delete("-").size
         
     | 
| 
      
 281 
     | 
    
         
            +
                    s2 = Sequence.new(source,
         
     | 
| 
      
 282 
     | 
    
         
            +
                                      start + non_gap_prev,
         
     | 
| 
      
 283 
     | 
    
         
            +
                                      non_gap_text,
         
     | 
| 
      
 284 
     | 
    
         
            +
                                      strand,
         
     | 
| 
      
 285 
     | 
    
         
            +
                                      src_size,
         
     | 
| 
      
 286 
     | 
    
         
            +
                                      new_text)
         
     | 
| 
      
 287 
     | 
    
         
            +
                    s2.quality = quality.slice(range) if quality
         
     | 
| 
      
 288 
     | 
    
         
            +
                    # TODO: what to do with synteny data?
         
     | 
| 
      
 289 
     | 
    
         
            +
                    s2
         
     | 
| 
      
 290 
     | 
    
         
            +
                  end
         
     | 
| 
      
 291 
     | 
    
         
            +
             
     | 
| 
       156 
292 
     | 
    
         
             
                  # Whether this sequence is empty. Only true for {EmptySequence}
         
     | 
| 
       157 
293 
     | 
    
         
             
                  # instances from 'e' lines.
         
     | 
| 
       158 
294 
     | 
    
         
             
                  def empty?
         
     | 
| 
         @@ -163,6 +299,43 @@ module Bio 
     | 
|
| 
       163 
299 
     | 
    
         
             
                    size != text.size
         
     | 
| 
       164 
300 
     | 
    
         
             
                  end
         
     | 
| 
       165 
301 
     | 
    
         | 
| 
      
 302 
     | 
    
         
            +
                  I_STATUS = {
         
     | 
| 
      
 303 
     | 
    
         
            +
                    'C' => :contiguous,
         
     | 
| 
      
 304 
     | 
    
         
            +
                    'I' => :intervening,
         
     | 
| 
      
 305 
     | 
    
         
            +
                    'N' => :first,
         
     | 
| 
      
 306 
     | 
    
         
            +
                    'n' => :first_bridged,
         
     | 
| 
      
 307 
     | 
    
         
            +
                    'M' => :missing_data,
         
     | 
| 
      
 308 
     | 
    
         
            +
                    'T' => :tandem
         
     | 
| 
      
 309 
     | 
    
         
            +
                  }
         
     | 
| 
      
 310 
     | 
    
         
            +
             
     | 
| 
      
 311 
     | 
    
         
            +
                  def decode_status_char(c)
         
     | 
| 
      
 312 
     | 
    
         
            +
                    I_STATUS[c] || raise("Unsupported status character #{c}!")
         
     | 
| 
      
 313 
     | 
    
         
            +
                  end
         
     | 
| 
      
 314 
     | 
    
         
            +
             
     | 
| 
      
 315 
     | 
    
         
            +
                  def left_status_char
         
     | 
| 
      
 316 
     | 
    
         
            +
                    i_data && i_data[0]
         
     | 
| 
      
 317 
     | 
    
         
            +
                  end
         
     | 
| 
      
 318 
     | 
    
         
            +
             
     | 
| 
      
 319 
     | 
    
         
            +
                  def left_status
         
     | 
| 
      
 320 
     | 
    
         
            +
                    i_data && decode_status_char(left_status_char())
         
     | 
| 
      
 321 
     | 
    
         
            +
                  end
         
     | 
| 
      
 322 
     | 
    
         
            +
             
     | 
| 
      
 323 
     | 
    
         
            +
                  def left_count
         
     | 
| 
      
 324 
     | 
    
         
            +
                    i_data && i_data[1].to_i
         
     | 
| 
      
 325 
     | 
    
         
            +
                  end
         
     | 
| 
      
 326 
     | 
    
         
            +
             
     | 
| 
      
 327 
     | 
    
         
            +
                  def right_status_char
         
     | 
| 
      
 328 
     | 
    
         
            +
                    i_data && i_data[2]
         
     | 
| 
      
 329 
     | 
    
         
            +
                  end
         
     | 
| 
      
 330 
     | 
    
         
            +
             
     | 
| 
      
 331 
     | 
    
         
            +
                  def right_status
         
     | 
| 
      
 332 
     | 
    
         
            +
                    i_data && decode_status_char(right_status_char())
         
     | 
| 
      
 333 
     | 
    
         
            +
                  end
         
     | 
| 
      
 334 
     | 
    
         
            +
             
     | 
| 
      
 335 
     | 
    
         
            +
                  def right_count
         
     | 
| 
      
 336 
     | 
    
         
            +
                    i_data && i_data[3].to_i
         
     | 
| 
      
 337 
     | 
    
         
            +
                  end
         
     | 
| 
      
 338 
     | 
    
         
            +
             
     | 
| 
       166 
339 
     | 
    
         
             
                  def species
         
     | 
| 
       167 
340 
     | 
    
         
             
                    parts = source.split('.', 2)
         
     | 
| 
       168 
341 
     | 
    
         
             
                    parts.size == 2 ? parts[0] : nil
         
     | 
| 
         @@ -177,11 +350,34 @@ module Bio 
     | 
|
| 
       177 
350 
     | 
    
         
             
                    end
         
     | 
| 
       178 
351 
     | 
    
         
             
                  end
         
     | 
| 
       179 
352 
     | 
    
         | 
| 
      
 353 
     | 
    
         
            +
                  def to_bio_alignment
         
     | 
| 
      
 354 
     | 
    
         
            +
                    Bio::BioAlignment::Sequence.new(source, text)
         
     | 
| 
      
 355 
     | 
    
         
            +
                  end
         
     | 
| 
      
 356 
     | 
    
         
            +
             
     | 
| 
       180 
357 
     | 
    
         
             
                  def write_fasta(writer)
         
     | 
| 
       181 
358 
     | 
    
         
             
                    writer.write("#{source}:#{start}-#{start + size}",
         
     | 
| 
       182 
359 
     | 
    
         
             
                                 text)
         
     | 
| 
       183 
360 
     | 
    
         
             
                  end
         
     | 
| 
       184 
361 
     | 
    
         | 
| 
      
 362 
     | 
    
         
            +
                  def joinable_with?(o)
         
     | 
| 
      
 363 
     | 
    
         
            +
                    (self.end == o.start) \
         
     | 
| 
      
 364 
     | 
    
         
            +
                    && (self.strand == o.strand) \
         
     | 
| 
      
 365 
     | 
    
         
            +
                    && (self.empty? == o.empty?)
         
     | 
| 
      
 366 
     | 
    
         
            +
                  end
         
     | 
| 
      
 367 
     | 
    
         
            +
             
     | 
| 
      
 368 
     | 
    
         
            +
                  def join(o)
         
     | 
| 
      
 369 
     | 
    
         
            +
                    s2 = Sequence.new(source,
         
     | 
| 
      
 370 
     | 
    
         
            +
                                      start,
         
     | 
| 
      
 371 
     | 
    
         
            +
                                      size + o.size,
         
     | 
| 
      
 372 
     | 
    
         
            +
                                      strand,
         
     | 
| 
      
 373 
     | 
    
         
            +
                                      src_size,
         
     | 
| 
      
 374 
     | 
    
         
            +
                                      text + o.text)
         
     | 
| 
      
 375 
     | 
    
         
            +
                    if quality && o.quality
         
     | 
| 
      
 376 
     | 
    
         
            +
                      s2.quality = quality + o.quality
         
     | 
| 
      
 377 
     | 
    
         
            +
                    end
         
     | 
| 
      
 378 
     | 
    
         
            +
                    s2
         
     | 
| 
      
 379 
     | 
    
         
            +
                  end
         
     | 
| 
      
 380 
     | 
    
         
            +
             
     | 
| 
       185 
381 
     | 
    
         
             
                  # Maps the given zero-based genomic range onto a range of string
         
     | 
| 
       186 
382 
     | 
    
         
             
                  # offsets, suitable for extracting the text for the given range
         
     | 
| 
       187 
383 
     | 
    
         
             
                  # from #text.
         
     | 
| 
         @@ -253,6 +449,19 @@ module Bio 
     | 
|
| 
       253 
449 
     | 
    
         
             
                    ''
         
     | 
| 
       254 
450 
     | 
    
         
             
                  end
         
     | 
| 
       255 
451 
     | 
    
         | 
| 
      
 452 
     | 
    
         
            +
                  def slice(offset, len)
         
     | 
| 
      
 453 
     | 
    
         
            +
                    self
         
     | 
| 
      
 454 
     | 
    
         
            +
                  end
         
     | 
| 
      
 455 
     | 
    
         
            +
             
     | 
| 
      
 456 
     | 
    
         
            +
                  def join(o)
         
     | 
| 
      
 457 
     | 
    
         
            +
                    EmptySequence.new(source,
         
     | 
| 
      
 458 
     | 
    
         
            +
                                      start,
         
     | 
| 
      
 459 
     | 
    
         
            +
                                      size + o.size,
         
     | 
| 
      
 460 
     | 
    
         
            +
                                      strand,
         
     | 
| 
      
 461 
     | 
    
         
            +
                                      src_size,
         
     | 
| 
      
 462 
     | 
    
         
            +
                                      @status)
         
     | 
| 
      
 463 
     | 
    
         
            +
                  end
         
     | 
| 
      
 464 
     | 
    
         
            +
             
     | 
| 
       256 
465 
     | 
    
         
             
                  def empty?
         
     | 
| 
       257 
466 
     | 
    
         
             
                    true
         
     | 
| 
       258 
467 
     | 
    
         
             
                  end
         
     | 
    
        data/lib/bio/maf/parser.rb
    CHANGED
    
    | 
         @@ -150,7 +150,7 @@ module Bio 
     | 
|
| 
       150 
150 
     | 
    
         
             
                  #
         
     | 
| 
       151 
151 
     | 
    
         
             
                  # @return [Block] alignment block
         
     | 
| 
       152 
152 
     | 
    
         
             
                  # @api public
         
     | 
| 
       153 
     | 
    
         
            -
                  def  
     | 
| 
      
 153 
     | 
    
         
            +
                  def _parse_block
         
     | 
| 
       154 
154 
     | 
    
         
             
                    return nil if at_end
         
     | 
| 
       155 
155 
     | 
    
         
             
                    if s.pos != last_block_pos
         
     | 
| 
       156 
156 
     | 
    
         
             
                      # in non-trailing block
         
     | 
| 
         @@ -296,19 +296,11 @@ module Bio 
     | 
|
| 
       296 
296 
     | 
    
         
             
                        parse_error "unexpected line: '#{line}'"
         
     | 
| 
       297 
297 
     | 
    
         
             
                      end
         
     | 
| 
       298 
298 
     | 
    
         
             
                    end
         
     | 
| 
       299 
     | 
    
         
            -
                     
     | 
| 
       300 
     | 
    
         
            -
             
     | 
| 
       301 
     | 
    
         
            -
             
     | 
| 
       302 
     | 
    
         
            -
             
     | 
| 
       303 
     | 
    
         
            -
             
     | 
| 
       304 
     | 
    
         
            -
                    postprocess_block(block)
         
     | 
| 
       305 
     | 
    
         
            -
                  end
         
     | 
| 
       306 
     | 
    
         
            -
             
     | 
| 
       307 
     | 
    
         
            -
                  def postprocess_block(block)
         
     | 
| 
       308 
     | 
    
         
            -
                    if block.filtered? && opts[:remove_gaps]
         
     | 
| 
       309 
     | 
    
         
            -
                      block.remove_gaps!
         
     | 
| 
       310 
     | 
    
         
            -
                    end
         
     | 
| 
       311 
     | 
    
         
            -
                    block
         
     | 
| 
      
 299 
     | 
    
         
            +
                    Block.new(block_vars,
         
     | 
| 
      
 300 
     | 
    
         
            +
                              seqs,
         
     | 
| 
      
 301 
     | 
    
         
            +
                              block_offset,
         
     | 
| 
      
 302 
     | 
    
         
            +
                              s.pos - block_start_pos,
         
     | 
| 
      
 303 
     | 
    
         
            +
                              filtered)
         
     | 
| 
       312 
304 
     | 
    
         
             
                  end
         
     | 
| 
       313 
305 
     | 
    
         | 
| 
       314 
306 
     | 
    
         
             
                  # Parse an 's' line.
         
     | 
| 
         @@ -419,19 +411,21 @@ module Bio 
     | 
|
| 
       419 
411 
     | 
    
         
             
                  # @param [Array] block_offsets Offsets of blocks to parse.
         
     | 
| 
       420 
412 
     | 
    
         
             
                  # @return [Array<Block>]
         
     | 
| 
       421 
413 
     | 
    
         
             
                  def fetch_blocks(offset, len, block_offsets)
         
     | 
| 
       422 
     | 
    
         
            -
                     
     | 
| 
       423 
     | 
    
         
            -
             
     | 
| 
       424 
     | 
    
         
            -
             
     | 
| 
       425 
     | 
    
         
            -
             
     | 
| 
       426 
     | 
    
         
            -
             
     | 
| 
       427 
     | 
    
         
            -
             
     | 
| 
       428 
     | 
    
         
            -
             
     | 
| 
      
 414 
     | 
    
         
            +
                    if block_given?
         
     | 
| 
      
 415 
     | 
    
         
            +
                      start_chunk_read_if_needed(offset, len)
         
     | 
| 
      
 416 
     | 
    
         
            +
                      # read chunks until we have the entire merged set of
         
     | 
| 
      
 417 
     | 
    
         
            +
                      # blocks ready to parse
         
     | 
| 
      
 418 
     | 
    
         
            +
                      # to avoid fragment joining
         
     | 
| 
      
 419 
     | 
    
         
            +
                      append_chunks_to(len)
         
     | 
| 
      
 420 
     | 
    
         
            +
                      # parse the blocks
         
     | 
| 
       429 
421 
     | 
    
         
             
                      block_offsets.each do |expected_offset|
         
     | 
| 
       430 
     | 
    
         
            -
                        block =  
     | 
| 
       431 
     | 
    
         
            -
                         
     | 
| 
       432 
     | 
    
         
            -
                         
     | 
| 
       433 
     | 
    
         
            -
                         
     | 
| 
      
 422 
     | 
    
         
            +
                        block = _parse_block
         
     | 
| 
      
 423 
     | 
    
         
            +
                        parse_error("expected a block at offset #{expected_offset} but could not parse one!") unless block
         
     | 
| 
      
 424 
     | 
    
         
            +
                        parse_error("got block with offset #{block.offset}, expected #{expected_offset}!") unless block.offset == expected_offset
         
     | 
| 
      
 425 
     | 
    
         
            +
                        yield block
         
     | 
| 
       434 
426 
     | 
    
         
             
                      end
         
     | 
| 
      
 427 
     | 
    
         
            +
                    else
         
     | 
| 
      
 428 
     | 
    
         
            +
                      enum_for(:fetch_blocks, offset, len, block_offsets)
         
     | 
| 
       435 
429 
     | 
    
         
             
                    end
         
     | 
| 
       436 
430 
     | 
    
         
             
                  end
         
     | 
| 
       437 
431 
     | 
    
         | 
| 
         @@ -530,6 +524,10 @@ module Bio 
     | 
|
| 
       530 
524 
     | 
    
         
             
                    _parse_header()
         
     | 
| 
       531 
525 
     | 
    
         
             
                  end
         
     | 
| 
       532 
526 
     | 
    
         | 
| 
      
 527 
     | 
    
         
            +
                  def close
         
     | 
| 
      
 528 
     | 
    
         
            +
                    f.close
         
     | 
| 
      
 529 
     | 
    
         
            +
                  end
         
     | 
| 
      
 530 
     | 
    
         
            +
             
     | 
| 
       533 
531 
     | 
    
         
             
                  # Create a {ParseContext} for random access, using the given
         
     | 
| 
       534 
532 
     | 
    
         
             
                  # chunk size.
         
     | 
| 
       535 
533 
     | 
    
         
             
                  #
         
     | 
| 
         @@ -574,13 +572,19 @@ module Bio 
     | 
|
| 
       574 
572 
     | 
    
         
             
                  # `fetch_list` should be an array of `[offset, length]` tuples.
         
     | 
| 
       575 
573 
     | 
    
         
             
                  #
         
     | 
| 
       576 
574 
     | 
    
         
             
                  # @param [Array] fetch_list the fetch list
         
     | 
| 
       577 
     | 
    
         
            -
                  # @ 
     | 
| 
       578 
     | 
    
         
            -
                   
     | 
| 
       579 
     | 
    
         
            -
             
     | 
| 
       580 
     | 
    
         
            -
                    if  
     | 
| 
       581 
     | 
    
         
            -
                       
     | 
| 
      
 575 
     | 
    
         
            +
                  # @yield [block] each block matched, in turn
         
     | 
| 
      
 576 
     | 
    
         
            +
                  # @return [Enumerable<Block>] each matching {Block}, if no block given
         
     | 
| 
      
 577 
     | 
    
         
            +
                  def fetch_blocks(fetch_list, &blk)
         
     | 
| 
      
 578 
     | 
    
         
            +
                    if blk
         
     | 
| 
      
 579 
     | 
    
         
            +
                      merged = merge_fetch_list(fetch_list)
         
     | 
| 
      
 580 
     | 
    
         
            +
                      if RUBY_PLATFORM == 'java' && @opts.fetch(:threads, 1) > 1
         
     | 
| 
      
 581 
     | 
    
         
            +
                        fun = lambda { |&b2| fetch_blocks_merged_parallel(merged, &b2) }
         
     | 
| 
      
 582 
     | 
    
         
            +
                      else
         
     | 
| 
      
 583 
     | 
    
         
            +
                        fun = lambda { |&b2| fetch_blocks_merged(merged, &b2) }
         
     | 
| 
      
 584 
     | 
    
         
            +
                      end
         
     | 
| 
      
 585 
     | 
    
         
            +
                      wrap_block_seq(fun, &blk)
         
     | 
| 
       582 
586 
     | 
    
         
             
                    else
         
     | 
| 
       583 
     | 
    
         
            -
                       
     | 
| 
      
 587 
     | 
    
         
            +
                      enum_for(:fetch_blocks, fetch_list)
         
     | 
| 
       584 
588 
     | 
    
         
             
                    end
         
     | 
| 
       585 
589 
     | 
    
         
             
                  end
         
     | 
| 
       586 
590 
     | 
    
         | 
| 
         @@ -588,23 +592,19 @@ module Bio 
     | 
|
| 
       588 
592 
     | 
    
         
             
                  #
         
     | 
| 
       589 
593 
     | 
    
         
             
                  # @param [Array] fetch_list merged fetch list from {#merge_fetch_list}.
         
     | 
| 
       590 
594 
     | 
    
         
             
                  # @return [Array<Block>] the requested alignment blocks
         
     | 
| 
       591 
     | 
    
         
            -
                  def fetch_blocks_merged(fetch_list)
         
     | 
| 
       592 
     | 
    
         
            -
                     
     | 
| 
       593 
     | 
    
         
            -
             
     | 
| 
       594 
     | 
    
         
            -
             
     | 
| 
       595 
     | 
    
         
            -
                       
     | 
| 
       596 
     | 
    
         
            -
                         
     | 
| 
       597 
     | 
    
         
            -
                          ctx.fetch_blocks(*e).each do |block|
         
     | 
| 
       598 
     | 
    
         
            -
                            y << block
         
     | 
| 
       599 
     | 
    
         
            -
                            #total_size += block.size
         
     | 
| 
       600 
     | 
    
         
            -
                          end
         
     | 
| 
       601 
     | 
    
         
            -
                        end
         
     | 
| 
      
 595 
     | 
    
         
            +
                  def fetch_blocks_merged(fetch_list, &blk)
         
     | 
| 
      
 596 
     | 
    
         
            +
                    start = Time.now
         
     | 
| 
      
 597 
     | 
    
         
            +
                    total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
         
     | 
| 
      
 598 
     | 
    
         
            +
                    with_context(@random_access_chunk_size) do |ctx|
         
     | 
| 
      
 599 
     | 
    
         
            +
                      fetch_list.each do |e|
         
     | 
| 
      
 600 
     | 
    
         
            +
                        ctx.fetch_blocks(*e, &blk)
         
     | 
| 
       602 
601 
     | 
    
         
             
                      end
         
     | 
| 
       603 
     | 
    
         
            -
                      elapsed = Time.now - start
         
     | 
| 
       604 
     | 
    
         
            -
                      rate = (total_size / 1048576.0) / elapsed
         
     | 
| 
       605 
     | 
    
         
            -
                      $stderr.printf("Fetched blocks in %.3fs, %.1f MB/s.\n",
         
     | 
| 
       606 
     | 
    
         
            -
                                     elapsed, rate)
         
     | 
| 
       607 
602 
     | 
    
         
             
                    end
         
     | 
| 
      
 603 
     | 
    
         
            +
                    elapsed = Time.now - start
         
     | 
| 
      
 604 
     | 
    
         
            +
                    # TODO: debug log
         
     | 
| 
      
 605 
     | 
    
         
            +
                    # rate = (total_size / 1048576.0) / elapsed
         
     | 
| 
      
 606 
     | 
    
         
            +
                    # $stderr.printf("Fetched blocks in %.3fs, %.1f MB/s.\n",
         
     | 
| 
      
 607 
     | 
    
         
            +
                    #                elapsed, rate)
         
     | 
| 
       608 
608 
     | 
    
         
             
                  end
         
     | 
| 
       609 
609 
     | 
    
         | 
| 
       610 
610 
     | 
    
         
             
                  # Fetch and parse the blocks given by the merged fetch list, in
         
     | 
| 
         @@ -614,40 +614,38 @@ module Bio 
     | 
|
| 
       614 
614 
     | 
    
         
             
                  # @param [Array] fetch_list merged fetch list from {#merge_fetch_list}.
         
     | 
| 
       615 
615 
     | 
    
         
             
                  # @return [Array<Block>] the requested alignment blocks
         
     | 
| 
       616 
616 
     | 
    
         
             
                  def fetch_blocks_merged_parallel(fetch_list)
         
     | 
| 
       617 
     | 
    
         
            -
                     
     | 
| 
       618 
     | 
    
         
            -
             
     | 
| 
       619 
     | 
    
         
            -
             
     | 
| 
       620 
     | 
    
         
            -
             
     | 
| 
       621 
     | 
    
         
            -
             
     | 
| 
       622 
     | 
    
         
            -
             
     | 
| 
       623 
     | 
    
         
            -
             
     | 
| 
       624 
     | 
    
         
            -
             
     | 
| 
       625 
     | 
    
         
            -
             
     | 
| 
       626 
     | 
    
         
            -
             
     | 
| 
       627 
     | 
    
         
            -
             
     | 
| 
       628 
     | 
    
         
            -
             
     | 
| 
       629 
     | 
    
         
            -
             
     | 
| 
       630 
     | 
    
         
            -
                       
     | 
| 
       631 
     | 
    
         
            -
             
     | 
| 
       632 
     | 
    
         
            -
                         
     | 
| 
       633 
     | 
    
         
            -
             
     | 
| 
       634 
     | 
    
         
            -
                          next
         
     | 
| 
       635 
     | 
    
         
            -
                        end
         
     | 
| 
       636 
     | 
    
         
            -
                        c.each do |block|
         
     | 
| 
       637 
     | 
    
         
            -
                          y << block
         
     | 
| 
       638 
     | 
    
         
            -
                        end
         
     | 
| 
       639 
     | 
    
         
            -
                        n_res += 1
         
     | 
| 
      
 617 
     | 
    
         
            +
                    total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
         
     | 
| 
      
 618 
     | 
    
         
            +
                    start = Time.now
         
     | 
| 
      
 619 
     | 
    
         
            +
                    n_threads = @opts.fetch(:threads, 1)
         
     | 
| 
      
 620 
     | 
    
         
            +
                    # TODO: break entries up into longer runs for more
         
     | 
| 
      
 621 
     | 
    
         
            +
                    # sequential I/O
         
     | 
| 
      
 622 
     | 
    
         
            +
                    jobs = java.util.concurrent.ConcurrentLinkedQueue.new(fetch_list)
         
     | 
| 
      
 623 
     | 
    
         
            +
                    ct = CompletionTracker.new(fetch_list)
         
     | 
| 
      
 624 
     | 
    
         
            +
                    completed = ct.queue
         
     | 
| 
      
 625 
     | 
    
         
            +
                    threads = []
         
     | 
| 
      
 626 
     | 
    
         
            +
                    n_threads.times { threads << make_worker(jobs, ct) }
         
     | 
| 
      
 627 
     | 
    
         
            +
             
     | 
| 
      
 628 
     | 
    
         
            +
                    n_res = 0
         
     | 
| 
      
 629 
     | 
    
         
            +
                    while n_res < fetch_list.size
         
     | 
| 
      
 630 
     | 
    
         
            +
                      c = completed.poll(1, java.util.concurrent.TimeUnit::SECONDS)
         
     | 
| 
      
 631 
     | 
    
         
            +
                      unless c
         
     | 
| 
      
 632 
     | 
    
         
            +
                        raise "Worker failed!" if threads.find { |t| t.status.nil? }
         
     | 
| 
      
 633 
     | 
    
         
            +
                        next
         
     | 
| 
       640 
634 
     | 
    
         
             
                      end
         
     | 
| 
       641 
     | 
    
         
            -
                       
     | 
| 
       642 
     | 
    
         
            -
             
     | 
| 
       643 
     | 
    
         
            -
                       
     | 
| 
       644 
     | 
    
         
            -
             
     | 
| 
       645 
     | 
    
         
            -
                                     elapsed)
         
     | 
| 
       646 
     | 
    
         
            -
                      mb = total_size / 1048576.0
         
     | 
| 
       647 
     | 
    
         
            -
                      $stderr.printf("%.3f MB processed (%.1f MB/s).\n",
         
     | 
| 
       648 
     | 
    
         
            -
                                     mb,
         
     | 
| 
       649 
     | 
    
         
            -
                                     mb / elapsed)
         
     | 
| 
      
 635 
     | 
    
         
            +
                      c.each do |block|
         
     | 
| 
      
 636 
     | 
    
         
            +
                        yield block
         
     | 
| 
      
 637 
     | 
    
         
            +
                      end
         
     | 
| 
      
 638 
     | 
    
         
            +
                      n_res += 1
         
     | 
| 
       650 
639 
     | 
    
         
             
                    end
         
     | 
| 
      
 640 
     | 
    
         
            +
                    threads.each { |t| t.join }
         
     | 
| 
      
 641 
     | 
    
         
            +
                    elapsed = Time.now - start
         
     | 
| 
      
 642 
     | 
    
         
            +
                    $stderr.printf("Fetched blocks from %d threads in %.1fs.\n",
         
     | 
| 
      
 643 
     | 
    
         
            +
                                   n_threads,
         
     | 
| 
      
 644 
     | 
    
         
            +
                                   elapsed)
         
     | 
| 
      
 645 
     | 
    
         
            +
                    mb = total_size / 1048576.0
         
     | 
| 
      
 646 
     | 
    
         
            +
                    $stderr.printf("%.3f MB processed (%.1f MB/s).\n",
         
     | 
| 
      
 647 
     | 
    
         
            +
                                   mb,
         
     | 
| 
      
 648 
     | 
    
         
            +
                                   mb / elapsed)
         
     | 
| 
       651 
649 
     | 
    
         
             
                  end
         
     | 
| 
       652 
650 
     | 
    
         | 
| 
       653 
651 
     | 
    
         
             
                  # Create a worker thread for parallel parsing.
         
     | 
| 
         @@ -721,30 +719,119 @@ module Bio 
     | 
|
| 
       721 
719 
     | 
    
         
             
                  # Delegates to {#parse_blocks_parallel} if `:threads` is set
         
     | 
| 
       722 
720 
     | 
    
         
             
                  # under JRuby.
         
     | 
| 
       723 
721 
     | 
    
         
             
                  #
         
     | 
| 
       724 
     | 
    
         
            -
                  # @return [Enumerator<Block>] enumerator of  
     | 
| 
      
 722 
     | 
    
         
            +
                  # @return [Enumerator<Block>] enumerator of {Block}s if no block given.
         
     | 
| 
      
 723 
     | 
    
         
            +
                  # @yield [block] Passes each {Block} in turn to a block
         
     | 
| 
       725 
724 
     | 
    
         
             
                  # @api public
         
     | 
| 
       726 
     | 
    
         
            -
                  def  
     | 
| 
       727 
     | 
    
         
            -
                    if  
     | 
| 
       728 
     | 
    
         
            -
                       
     | 
| 
      
 725 
     | 
    
         
            +
                  def each_block(&blk)
         
     | 
| 
      
 726 
     | 
    
         
            +
                    if block_given?
         
     | 
| 
      
 727 
     | 
    
         
            +
                      if RUBY_PLATFORM == 'java' && @opts.has_key?(:threads)
         
     | 
| 
      
 728 
     | 
    
         
            +
                        fun = method(:parse_blocks_parallel)
         
     | 
| 
      
 729 
     | 
    
         
            +
                      else
         
     | 
| 
      
 730 
     | 
    
         
            +
                        fun = method(:each_block_seq)
         
     | 
| 
      
 731 
     | 
    
         
            +
                      end
         
     | 
| 
      
 732 
     | 
    
         
            +
                      wrap_block_seq(fun, &blk)
         
     | 
| 
       729 
733 
     | 
    
         
             
                    else
         
     | 
| 
       730 
     | 
    
         
            -
                       
     | 
| 
       731 
     | 
    
         
            -
             
     | 
| 
       732 
     | 
    
         
            -
             
     | 
| 
       733 
     | 
    
         
            -
             
     | 
| 
      
 734 
     | 
    
         
            +
                      enum_for(:each_block)
         
     | 
| 
      
 735 
     | 
    
         
            +
                    end
         
     | 
| 
      
 736 
     | 
    
         
            +
                  end
         
     | 
| 
      
 737 
     | 
    
         
            +
                  alias_method :parse_blocks, :each_block
         
     | 
| 
      
 738 
     | 
    
         
            +
             
     | 
| 
      
 739 
     | 
    
         
            +
                  def each_block_seq
         
     | 
| 
      
 740 
     | 
    
         
            +
                    until at_end
         
     | 
| 
      
 741 
     | 
    
         
            +
                      block = _parse_block()
         
     | 
| 
      
 742 
     | 
    
         
            +
                      yield block if block
         
     | 
| 
      
 743 
     | 
    
         
            +
                    end
         
     | 
| 
      
 744 
     | 
    
         
            +
                  end
         
     | 
| 
      
 745 
     | 
    
         
            +
             
     | 
| 
      
 746 
     | 
    
         
            +
                  def parse_block
         
     | 
| 
      
 747 
     | 
    
         
            +
                    b = nil
         
     | 
| 
      
 748 
     | 
    
         
            +
                    wrap_block_seq(lambda { |&blk| blk.call(_parse_block()) }) do |block|
         
     | 
| 
      
 749 
     | 
    
         
            +
                      b = block
         
     | 
| 
      
 750 
     | 
    
         
            +
                    end
         
     | 
| 
      
 751 
     | 
    
         
            +
                    b
         
     | 
| 
      
 752 
     | 
    
         
            +
                  end
         
     | 
| 
      
 753 
     | 
    
         
            +
             
     | 
| 
      
 754 
     | 
    
         
            +
                  WRAP_OPTS = [:as_bio_alignment, :join_blocks, :remove_gaps]
         
     | 
| 
      
 755 
     | 
    
         
            +
             
     | 
| 
      
 756 
     | 
    
         
            +
                  def wrap_block_seq(fun, &blk)
         
     | 
| 
      
 757 
     | 
    
         
            +
                    opts = WRAP_OPTS.find_all { |o| @opts[o] }
         
     | 
| 
      
 758 
     | 
    
         
            +
                    opts << :sequence_filter if sequence_filter && (! sequence_filter.empty?)
         
     | 
| 
      
 759 
     | 
    
         
            +
                    _wrap(opts, fun, &blk)
         
     | 
| 
      
 760 
     | 
    
         
            +
                  end
         
     | 
| 
      
 761 
     | 
    
         
            +
             
     | 
| 
      
 762 
     | 
    
         
            +
                  # options should be [:outer, ..., :inner]
         
     | 
| 
      
 763 
     | 
    
         
            +
                  def _wrap(options, fun, &blk)
         
     | 
| 
      
 764 
     | 
    
         
            +
                    first = options.shift
         
     | 
| 
      
 765 
     | 
    
         
            +
                    case first
         
     | 
| 
      
 766 
     | 
    
         
            +
                    when nil
         
     | 
| 
      
 767 
     | 
    
         
            +
                      fun.call(&blk)
         
     | 
| 
      
 768 
     | 
    
         
            +
                    when :sequence_filter
         
     | 
| 
      
 769 
     | 
    
         
            +
                      conv_map(options,
         
     | 
| 
      
 770 
     | 
    
         
            +
                               fun,
         
     | 
| 
      
 771 
     | 
    
         
            +
                               lambda { |b| b if b.sequences.size > 1 },
         
     | 
| 
      
 772 
     | 
    
         
            +
                               &blk)
         
     | 
| 
      
 773 
     | 
    
         
            +
                    when :join_blocks
         
     | 
| 
      
 774 
     | 
    
         
            +
                      block_joiner(options, fun, &blk)
         
     | 
| 
      
 775 
     | 
    
         
            +
                    when :as_bio_alignment
         
     | 
| 
      
 776 
     | 
    
         
            +
                      conv_send(options,
         
     | 
| 
      
 777 
     | 
    
         
            +
                                fun,
         
     | 
| 
      
 778 
     | 
    
         
            +
                                :to_bio_alignment,
         
     | 
| 
      
 779 
     | 
    
         
            +
                                &blk)
         
     | 
| 
      
 780 
     | 
    
         
            +
                    when :remove_gaps
         
     | 
| 
      
 781 
     | 
    
         
            +
                      conv_map(options,
         
     | 
| 
      
 782 
     | 
    
         
            +
                               fun,
         
     | 
| 
      
 783 
     | 
    
         
            +
                               lambda { |b| b.remove_gaps! if b.filtered?; b },
         
     | 
| 
      
 784 
     | 
    
         
            +
                               &blk)
         
     | 
| 
      
 785 
     | 
    
         
            +
                    else
         
     | 
| 
      
 786 
     | 
    
         
            +
                      raise "unhandled wrapper mode: #{first}"
         
     | 
| 
      
 787 
     | 
    
         
            +
                    end
         
     | 
| 
      
 788 
     | 
    
         
            +
                  end
         
     | 
| 
      
 789 
     | 
    
         
            +
             
     | 
| 
      
 790 
     | 
    
         
            +
                  def filter_seq_count(fun)
         
     | 
| 
      
 791 
     | 
    
         
            +
                    fun.call() do |block|
         
     | 
| 
      
 792 
     | 
    
         
            +
                      yield block if block.filtered? && block.sequences.size > 1
         
     | 
| 
      
 793 
     | 
    
         
            +
                    end
         
     | 
| 
      
 794 
     | 
    
         
            +
                  end
         
     | 
| 
      
 795 
     | 
    
         
            +
             
     | 
| 
      
 796 
     | 
    
         
            +
                  def block_joiner(options, fun)
         
     | 
| 
      
 797 
     | 
    
         
            +
                    prev = nil
         
     | 
| 
      
 798 
     | 
    
         
            +
                    _wrap(options, fun) do |cur|
         
     | 
| 
      
 799 
     | 
    
         
            +
                      if prev && (prev.filtered? || cur.filtered?) \
         
     | 
| 
      
 800 
     | 
    
         
            +
                        && prev.joinable_with?(cur)
         
     | 
| 
      
 801 
     | 
    
         
            +
                        prev = prev.join(cur)
         
     | 
| 
      
 802 
     | 
    
         
            +
                      else
         
     | 
| 
      
 803 
     | 
    
         
            +
                        yield prev if prev
         
     | 
| 
      
 804 
     | 
    
         
            +
                        prev = cur
         
     | 
| 
       734 
805 
     | 
    
         
             
                      end
         
     | 
| 
       735 
806 
     | 
    
         
             
                    end
         
     | 
| 
      
 807 
     | 
    
         
            +
                    yield prev if prev
         
     | 
| 
      
 808 
     | 
    
         
            +
                  end
         
     | 
| 
      
 809 
     | 
    
         
            +
             
     | 
| 
      
 810 
     | 
    
         
            +
                  def conv_map(options, search, fun)
         
     | 
| 
      
 811 
     | 
    
         
            +
                    _wrap(options, search) do |block|
         
     | 
| 
      
 812 
     | 
    
         
            +
                      v = fun.call(block)
         
     | 
| 
      
 813 
     | 
    
         
            +
                      yield v if v
         
     | 
| 
      
 814 
     | 
    
         
            +
                    end
         
     | 
| 
      
 815 
     | 
    
         
            +
                  end
         
     | 
| 
      
 816 
     | 
    
         
            +
             
     | 
| 
      
 817 
     | 
    
         
            +
                  def conv_send(options, search, sym)
         
     | 
| 
      
 818 
     | 
    
         
            +
                    _wrap(options, search) do |block|
         
     | 
| 
      
 819 
     | 
    
         
            +
                      v = block.send(sym)
         
     | 
| 
      
 820 
     | 
    
         
            +
                      yield v if v
         
     | 
| 
      
 821 
     | 
    
         
            +
                    end
         
     | 
| 
       736 
822 
     | 
    
         
             
                  end
         
     | 
| 
       737 
823 
     | 
    
         | 
| 
       738 
824 
     | 
    
         
             
                  # Parse alignment blocks with a worker thread.
         
     | 
| 
       739 
825 
     | 
    
         
             
                  #
         
     | 
| 
       740 
     | 
    
         
            -
                  # @ 
     | 
| 
      
 826 
     | 
    
         
            +
                  # @block block handler
         
     | 
| 
       741 
827 
     | 
    
         
             
                  # @api private
         
     | 
| 
       742 
828 
     | 
    
         
             
                  def parse_blocks_parallel
         
     | 
| 
       743 
829 
     | 
    
         
             
                    queue = java.util.concurrent.LinkedBlockingQueue.new(128)
         
     | 
| 
       744 
830 
     | 
    
         
             
                    worker = Thread.new do
         
     | 
| 
       745 
831 
     | 
    
         
             
                      begin
         
     | 
| 
       746 
832 
     | 
    
         
             
                        until at_end
         
     | 
| 
       747 
     | 
    
         
            -
                           
     | 
| 
      
 833 
     | 
    
         
            +
                          block = _parse_block()
         
     | 
| 
      
 834 
     | 
    
         
            +
                          queue.put(block) if block
         
     | 
| 
       748 
835 
     | 
    
         
             
                        end
         
     | 
| 
       749 
836 
     | 
    
         
             
                        queue.put(:eof)
         
     | 
| 
       750 
837 
     | 
    
         
             
                      rescue
         
     | 
| 
         @@ -752,31 +839,23 @@ module Bio 
     | 
|
| 
       752 
839 
     | 
    
         
             
                        $stderr.puts $!.backtrace.join("\n")
         
     | 
| 
       753 
840 
     | 
    
         
             
                      end
         
     | 
| 
       754 
841 
     | 
    
         
             
                    end
         
     | 
| 
       755 
     | 
    
         
            -
                     
     | 
| 
       756 
     | 
    
         
            -
             
     | 
| 
       757 
     | 
    
         
            -
             
     | 
| 
       758 
     | 
    
         
            -
                       
     | 
| 
       759 
     | 
    
         
            -
             
     | 
| 
       760 
     | 
    
         
            -
                         
     | 
| 
       761 
     | 
    
         
            -
             
     | 
| 
       762 
     | 
    
         
            -
             
     | 
| 
       763 
     | 
    
         
            -
                         
     | 
| 
       764 
     | 
    
         
            -
             
     | 
| 
       765 
     | 
    
         
            -
                         
     | 
| 
       766 
     | 
    
         
            -
             
     | 
| 
       767 
     | 
    
         
            -
                          n_final_poll += 1 unless worker.alive?
         
     | 
| 
       768 
     | 
    
         
            -
                        end
         
     | 
| 
       769 
     | 
    
         
            -
                        break if n_final_poll > 1
         
     | 
| 
       770 
     | 
    
         
            -
                      end
         
     | 
| 
       771 
     | 
    
         
            -
                      unless saw_eof
         
     | 
| 
       772 
     | 
    
         
            -
                        raise "worker exited unexpectedly!"
         
     | 
| 
      
 842 
     | 
    
         
            +
                    saw_eof = false
         
     | 
| 
      
 843 
     | 
    
         
            +
                    n_final_poll = 0
         
     | 
| 
      
 844 
     | 
    
         
            +
                    while true
         
     | 
| 
      
 845 
     | 
    
         
            +
                      block = queue.poll(1, java.util.concurrent.TimeUnit::SECONDS)
         
     | 
| 
      
 846 
     | 
    
         
            +
                      if block == :eof
         
     | 
| 
      
 847 
     | 
    
         
            +
                        saw_eof = true
         
     | 
| 
      
 848 
     | 
    
         
            +
                        break
         
     | 
| 
      
 849 
     | 
    
         
            +
                      elsif block
         
     | 
| 
      
 850 
     | 
    
         
            +
                        yield block
         
     | 
| 
      
 851 
     | 
    
         
            +
                      else
         
     | 
| 
      
 852 
     | 
    
         
            +
                        # timed out
         
     | 
| 
      
 853 
     | 
    
         
            +
                        n_final_poll += 1 unless worker.alive?
         
     | 
| 
       773 
854 
     | 
    
         
             
                      end
         
     | 
| 
      
 855 
     | 
    
         
            +
                      break if n_final_poll > 1
         
     | 
| 
       774 
856 
     | 
    
         
             
                    end
         
     | 
| 
       775 
     | 
    
         
            -
             
     | 
| 
       776 
     | 
    
         
            -
             
     | 
| 
       777 
     | 
    
         
            -
                  def each_block
         
     | 
| 
       778 
     | 
    
         
            -
                    until at_end
         
     | 
| 
       779 
     | 
    
         
            -
                      yield parse_block()
         
     | 
| 
      
 857 
     | 
    
         
            +
                    unless saw_eof
         
     | 
| 
      
 858 
     | 
    
         
            +
                      raise "worker exited unexpectedly!"
         
     | 
| 
       780 
859 
     | 
    
         
             
                    end
         
     | 
| 
       781 
860 
     | 
    
         
             
                  end
         
     | 
| 
       782 
861 
     | 
    
         |