bio-maf 1.0.0-java → 1.0.1-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/maf_bgzip +140 -12
- data/bin/maf_extract +50 -40
- data/bin/maf_index +11 -2
- data/bin/maf_tile +143 -46
- data/bio-maf.gemspec +3 -3
- data/features/bgzf.feature +45 -0
- data/features/maf-indexing.feature +6 -0
- data/features/maf-parsing.feature +17 -0
- data/features/maf-querying.feature +11 -0
- data/features/slice.feature +11 -0
- data/features/step_definitions/parse_steps.rb +1 -0
- data/features/tiling.feature +23 -5
- data/lib/bio-maf.rb +5 -1
- data/lib/bio/maf.rb +1 -0
- data/lib/bio/maf/index.rb +158 -68
- data/lib/bio/maf/jobs.rb +168 -0
- data/lib/bio/maf/maf.rb +24 -1
- data/lib/bio/maf/parser.rb +90 -35
- data/lib/bio/maf/struct.rb +4 -0
- data/lib/bio/maf/tiler.rb +30 -3
- data/lib/bio/ucsc/ucsc_bin.rb +14 -1
- data/man/maf_bgzip.1 +27 -0
- data/man/maf_bgzip.1.ronn +32 -0
- data/spec/bio/maf/index_spec.rb +3 -1
- data/spec/bio/maf/parser_spec.rb +6 -2
- data/spec/bio/ucsc/ucsc_bin_spec.rb +18 -0
- data/test/data/empty.maf +2 -0
- data/test/data/ext-bin.maf +22 -0
- data/test/data/gap-1.kct +0 -0
- data/test/data/mm8_chr7_tiny.kct +0 -0
- data/test/data/mm8_chrM_tiny.kct +0 -0
- metadata +380 -184
    
        data/lib/bio/maf/jobs.rb
    ADDED
    
    | @@ -0,0 +1,168 @@ | |
| 1 | 
            +
            require 'set'
         | 
| 2 | 
            +
            require 'java' if RUBY_PLATFORM == 'java'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            module Bio::MAF
         | 
| 5 | 
            +
              
         | 
| 6 | 
            +
              module JobRunner
         | 
| 7 | 
            +
                def JobRunner.create(n_parallel)
         | 
| 8 | 
            +
                  if RUBY_PLATFORM == 'java'
         | 
| 9 | 
            +
                    JThreadRunner.new(n_parallel)
         | 
| 10 | 
            +
                  else
         | 
| 11 | 
            +
                    ForkRunner.new(n_parallel)
         | 
| 12 | 
            +
                  end
         | 
| 13 | 
            +
                end
         | 
| 14 | 
            +
              end
         | 
| 15 | 
            +
             | 
| 16 | 
            +
              class ForkRunner
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                def initialize(n_parallel)
         | 
| 19 | 
            +
                  @n_parallel = n_parallel
         | 
| 20 | 
            +
                  @jobs = []
         | 
| 21 | 
            +
                  @kids = Set.new
         | 
| 22 | 
            +
                end
         | 
| 23 | 
            +
             | 
| 24 | 
            +
                def add(&proc)
         | 
| 25 | 
            +
                  @jobs << proc
         | 
| 26 | 
            +
                end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                def run
         | 
| 29 | 
            +
                  until @jobs.empty? && @kids.empty?
         | 
| 30 | 
            +
                    while can_start?
         | 
| 31 | 
            +
                      start_job
         | 
| 32 | 
            +
                    end
         | 
| 33 | 
            +
                    await
         | 
| 34 | 
            +
                  end
         | 
| 35 | 
            +
                end
         | 
| 36 | 
            +
             | 
| 37 | 
            +
                private
         | 
| 38 | 
            +
                
         | 
| 39 | 
            +
                def can_start?
         | 
| 40 | 
            +
                  (! @jobs.empty?) && @kids.size < @n_parallel
         | 
| 41 | 
            +
                end
         | 
| 42 | 
            +
             | 
| 43 | 
            +
                def start_job
         | 
| 44 | 
            +
                  job = @jobs.shift
         | 
| 45 | 
            +
                  pid = fork()
         | 
| 46 | 
            +
                  if pid
         | 
| 47 | 
            +
                    # parent
         | 
| 48 | 
            +
                    @kids << pid
         | 
| 49 | 
            +
                  else
         | 
| 50 | 
            +
                    # child
         | 
| 51 | 
            +
                    begin
         | 
| 52 | 
            +
                      job.call()
         | 
| 53 | 
            +
                      exit 0
         | 
| 54 | 
            +
                    rescue SystemExit
         | 
| 55 | 
            +
                      raise
         | 
| 56 | 
            +
                    rescue Exception
         | 
| 57 | 
            +
                      LOG.error $!
         | 
| 58 | 
            +
                      exit 1
         | 
| 59 | 
            +
                    end
         | 
| 60 | 
            +
                  end
         | 
| 61 | 
            +
                end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                def await
         | 
| 64 | 
            +
                  pid = Process.wait
         | 
| 65 | 
            +
                  unless @kids.delete?(pid)
         | 
| 66 | 
            +
                    raise "Completion of unexpected job #{pid}!"  
         | 
| 67 | 
            +
                  end
         | 
| 68 | 
            +
                  if ! $?.success?
         | 
| 69 | 
            +
                    raise "Job #{pid} failed with status #{status.exitstatus}!"
         | 
| 70 | 
            +
                  end
         | 
| 71 | 
            +
                end
         | 
| 72 | 
            +
             | 
| 73 | 
            +
              end
         | 
| 74 | 
            +
             | 
| 75 | 
            +
              class JThreadRunner
         | 
| 76 | 
            +
             | 
| 77 | 
            +
                def initialize(n_parallel)
         | 
| 78 | 
            +
                  @n_parallel = n_parallel
         | 
| 79 | 
            +
                  @exec = java.util.concurrent.Executors.newFixedThreadPool(n_parallel)
         | 
| 80 | 
            +
                  @ecs = java.util.concurrent.ExecutorCompletionService.new(@exec)
         | 
| 81 | 
            +
                  @n = 0
         | 
| 82 | 
            +
                end
         | 
| 83 | 
            +
             | 
| 84 | 
            +
                def add(&blk)
         | 
| 85 | 
            +
                  @ecs.submit(&blk)
         | 
| 86 | 
            +
                  @n += 1
         | 
| 87 | 
            +
                end
         | 
| 88 | 
            +
             | 
| 89 | 
            +
                def run
         | 
| 90 | 
            +
                  seen = 0
         | 
| 91 | 
            +
                  until seen == @n
         | 
| 92 | 
            +
                    f = @ecs.take()
         | 
| 93 | 
            +
                    begin
         | 
| 94 | 
            +
                      f.get()
         | 
| 95 | 
            +
                    rescue Exception => e
         | 
| 96 | 
            +
                      LOG.error e
         | 
| 97 | 
            +
                      @exec.shutdownNow()
         | 
| 98 | 
            +
                      raise
         | 
| 99 | 
            +
                    end
         | 
| 100 | 
            +
                    seen += 1
         | 
| 101 | 
            +
                  end
         | 
| 102 | 
            +
                  @exec.shutdown()
         | 
| 103 | 
            +
                end
         | 
| 104 | 
            +
             | 
| 105 | 
            +
              end
         | 
| 106 | 
            +
             | 
| 107 | 
            +
              module Executor
         | 
| 108 | 
            +
                def Executor.create
         | 
| 109 | 
            +
                  if RUBY_PLATFORM == 'java'
         | 
| 110 | 
            +
                    JExecutor.new
         | 
| 111 | 
            +
                  else
         | 
| 112 | 
            +
                    DummyExecutor.new
         | 
| 113 | 
            +
                  end
         | 
| 114 | 
            +
                end
         | 
| 115 | 
            +
              end
         | 
| 116 | 
            +
             | 
| 117 | 
            +
              class JExecutor
         | 
| 118 | 
            +
             | 
| 119 | 
            +
                def initialize
         | 
| 120 | 
            +
                  queue = java.util.concurrent.LinkedBlockingQueue.new(8)
         | 
| 121 | 
            +
                  policy = java.util.concurrent.ThreadPoolExecutor::CallerRunsPolicy.new
         | 
| 122 | 
            +
                  @exec = java.util.concurrent.ThreadPoolExecutor.new(1, 1, 1,
         | 
| 123 | 
            +
                                                                      java.util.concurrent.TimeUnit::MINUTES,
         | 
| 124 | 
            +
                                                                      queue,
         | 
| 125 | 
            +
                                                                      policy)
         | 
| 126 | 
            +
                  @ecs = java.util.concurrent.ExecutorCompletionService.new(@exec)
         | 
| 127 | 
            +
                  @submitted = 0
         | 
| 128 | 
            +
                  @completed = 0
         | 
| 129 | 
            +
                end
         | 
| 130 | 
            +
             | 
| 131 | 
            +
                def submit(&blk)
         | 
| 132 | 
            +
                  @ecs.submit(&blk)
         | 
| 133 | 
            +
                  @submitted += 1
         | 
| 134 | 
            +
                  check_for_errors
         | 
| 135 | 
            +
                end
         | 
| 136 | 
            +
             | 
| 137 | 
            +
                def check_for_errors
         | 
| 138 | 
            +
                  while f = @ecs.poll
         | 
| 139 | 
            +
                    f.get
         | 
| 140 | 
            +
                    @completed += 1
         | 
| 141 | 
            +
                  end
         | 
| 142 | 
            +
                end
         | 
| 143 | 
            +
             | 
| 144 | 
            +
                def shutdown
         | 
| 145 | 
            +
                  @exec.shutdown
         | 
| 146 | 
            +
                  until @completed == @submitted
         | 
| 147 | 
            +
                    f = @ecs.take
         | 
| 148 | 
            +
                    f.get
         | 
| 149 | 
            +
                    @completed += 1
         | 
| 150 | 
            +
                  end
         | 
| 151 | 
            +
                end
         | 
| 152 | 
            +
              end
         | 
| 153 | 
            +
             | 
| 154 | 
            +
              class DummyExecutor
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                def initialize
         | 
| 157 | 
            +
                end
         | 
| 158 | 
            +
             | 
| 159 | 
            +
                def submit
         | 
| 160 | 
            +
                  yield
         | 
| 161 | 
            +
                end
         | 
| 162 | 
            +
             | 
| 163 | 
            +
                def shutdown
         | 
| 164 | 
            +
                end
         | 
| 165 | 
            +
                
         | 
| 166 | 
            +
              end
         | 
| 167 | 
            +
             | 
| 168 | 
            +
            end
         | 
    
        data/lib/bio/maf/maf.rb
    CHANGED
    
    | @@ -58,10 +58,14 @@ module Bio | |
| 58 58 | 
             
                  attr_reader :sequences
         | 
| 59 59 | 
             
                  # Offset of the alignment block within the MAF file, in bytes.
         | 
| 60 60 | 
             
                  # @return [Integer]
         | 
| 61 | 
            -
                   | 
| 61 | 
            +
                  attr_accessor :offset
         | 
| 62 62 | 
             
                  # Size of the alignment block within the MAF file, in bytes.
         | 
| 63 63 | 
             
                  # @return [Integer]
         | 
| 64 64 | 
             
                  attr_reader :size
         | 
| 65 | 
            +
                  # Original text of the MAF block. Only available if the
         | 
| 66 | 
            +
                  # :retain_text parser option is set.
         | 
| 67 | 
            +
                  # @return [String]
         | 
| 68 | 
            +
                  attr_accessor :orig_text
         | 
| 65 69 |  | 
| 66 70 | 
             
                  def initialize(vars, sequences, offset, size, filtered)
         | 
| 67 71 | 
             
                    @vars = vars
         | 
| @@ -90,6 +94,10 @@ module Bio | |
| 90 94 | 
             
                    sequences.first.text.size
         | 
| 91 95 | 
             
                  end
         | 
| 92 96 |  | 
| 97 | 
            +
                  def upcase!
         | 
| 98 | 
            +
                    sequences.each { |s| s.upcase! }
         | 
| 99 | 
            +
                  end
         | 
| 100 | 
            +
             | 
| 93 101 | 
             
                  # Whether this block has been modified by a parser filter.
         | 
| 94 102 | 
             
                  # @return [Boolean]
         | 
| 95 103 | 
             
                  def filtered?
         | 
| @@ -101,6 +109,13 @@ module Bio | |
| 101 109 | 
             
                    Bio::BioAlignment::Alignment.new(ba_seq)
         | 
| 102 110 | 
             
                  end
         | 
| 103 111 |  | 
| 112 | 
            +
                  def to_s
         | 
| 113 | 
            +
                    buf = StringIO.new
         | 
| 114 | 
            +
                    writer = Writer.new(buf)
         | 
| 115 | 
            +
                    writer.write_block(self)
         | 
| 116 | 
            +
                    return buf.string
         | 
| 117 | 
            +
                  end
         | 
| 118 | 
            +
             | 
| 104 119 | 
             
                  GAP = /-+/
         | 
| 105 120 |  | 
| 106 121 | 
             
                  # Find gaps present in all sequences. These would generally
         | 
| @@ -356,6 +371,10 @@ module Bio | |
| 356 371 | 
             
                    end
         | 
| 357 372 | 
             
                  end
         | 
| 358 373 |  | 
| 374 | 
            +
                  def upcase!
         | 
| 375 | 
            +
                    text.upcase!
         | 
| 376 | 
            +
                  end
         | 
| 377 | 
            +
             | 
| 359 378 | 
             
                  def to_bio_alignment
         | 
| 360 379 | 
             
                    Bio::BioAlignment::Sequence.new(source, text)
         | 
| 361 380 | 
             
                  end
         | 
| @@ -471,6 +490,10 @@ module Bio | |
| 471 490 | 
             
                    true
         | 
| 472 491 | 
             
                  end
         | 
| 473 492 |  | 
| 493 | 
            +
                  def upcase!
         | 
| 494 | 
            +
                    # no-op
         | 
| 495 | 
            +
                  end
         | 
| 496 | 
            +
             | 
| 474 497 | 
             
                  def write_fasta(writer)
         | 
| 475 498 | 
             
                    raise "empty sequence output not implemented!"
         | 
| 476 499 | 
             
                  end
         | 
    
        data/lib/bio/maf/parser.rb
    CHANGED
    
    | @@ -1,4 +1,5 @@ | |
| 1 1 | 
             
            require 'strscan'
         | 
| 2 | 
            +
            require 'zlib'
         | 
| 2 3 | 
             
            require 'java' if RUBY_PLATFORM == 'java'
         | 
| 3 4 | 
             
            require 'bio-bgzf'
         | 
| 4 5 |  | 
| @@ -104,6 +105,7 @@ module Bio | |
| 104 105 |  | 
| 105 106 | 
             
                  # Spawn a read-ahead thread. Called from {#initialize}.
         | 
| 106 107 | 
             
                  def start_read_ahead
         | 
| 108 | 
            +
                    LOG.debug { "Starting read-ahead thread." }
         | 
| 107 109 | 
             
                    @read_thread = Thread.new { read_ahead }
         | 
| 108 110 | 
             
                  end
         | 
| 109 111 |  | 
| @@ -169,6 +171,7 @@ module Bio | |
| 169 171 | 
             
                  BLOCK_START = /^(?=a)/
         | 
| 170 172 | 
             
                  BLOCK_START_OR_EOS = /(?:^(?=a))|\z/
         | 
| 171 173 | 
             
                  EOL_OR_EOF = /\n|\z/
         | 
| 174 | 
            +
                  JRUBY_P = (RUBY_PLATFORM == 'java')
         | 
| 172 175 |  | 
| 173 176 | 
             
                  def set_last_block_pos!
         | 
| 174 177 | 
             
                    @last_block_pos = s.string.rindex(BLOCK_START)
         | 
| @@ -333,14 +336,22 @@ module Bio | |
| 333 336 | 
             
                      elsif [I, E, Q, COMMENT, nil].include? first
         | 
| 334 337 | 
             
                        next
         | 
| 335 338 | 
             
                      else
         | 
| 336 | 
            -
                         | 
| 339 | 
            +
                        if opts[:strict]
         | 
| 340 | 
            +
                          parse_error "unexpected line: '#{line}'"
         | 
| 341 | 
            +
                        else
         | 
| 342 | 
            +
                          LOG.warn "Ignoring invalid MAF line: '#{line}'"
         | 
| 343 | 
            +
                        end
         | 
| 337 344 | 
             
                      end
         | 
| 338 345 | 
             
                    end
         | 
| 339 | 
            -
                    Block.new(block_vars,
         | 
| 340 | 
            -
             | 
| 341 | 
            -
             | 
| 342 | 
            -
             | 
| 343 | 
            -
             | 
| 346 | 
            +
                    b = Block.new(block_vars,
         | 
| 347 | 
            +
                                  seqs,
         | 
| 348 | 
            +
                                  block_offset,
         | 
| 349 | 
            +
                                  s.pos - block_start_pos,
         | 
| 350 | 
            +
                                  filtered)
         | 
| 351 | 
            +
                    if opts[:retain_text]
         | 
| 352 | 
            +
                      b.orig_text = s.string.slice(block_start_pos...(s.pos))
         | 
| 353 | 
            +
                    end
         | 
| 354 | 
            +
                    return b
         | 
| 344 355 | 
             
                  end
         | 
| 345 356 |  | 
| 346 357 | 
             
                  # Parse an 's' line.
         | 
| @@ -504,12 +515,16 @@ module Bio | |
| 504 515 | 
             
                #  * `:parse_extended`: whether to parse 'i' and 'q' lines
         | 
| 505 516 | 
             
                #  * `:parse_empty`: whether to parse 'e' lines
         | 
| 506 517 | 
             
                #  * `:remove_gaps`: remove gaps left after filtering sequences
         | 
| 518 | 
            +
                #  * `:join_blocks`: join blocks where possible
         | 
| 519 | 
            +
                #  * `:upcase`: fold sequence data to upper case
         | 
| 507 520 | 
             
                #  * `:chunk_size`: read MAF file in chunks of this many bytes
         | 
| 508 521 | 
             
                #  * `:random_chunk_size`: as above, but for random access ({#fetch_blocks})
         | 
| 509 522 | 
             
                #  * `:merge_max`: merge up to this many bytes of blocks for
         | 
| 510 523 | 
             
                #    random access
         | 
| 511 524 | 
             
                #  * `:threads`: number of threads to use for parallel
         | 
| 512 525 | 
             
                #    parsing. Only useful under JRuby.
         | 
| 526 | 
            +
                #  * `:strict`: abort on un-parseable lines instead of continuing with
         | 
| 527 | 
            +
                #    a warning.
         | 
| 513 528 | 
             
                # @api public
         | 
| 514 529 |  | 
| 515 530 | 
             
                class Parser
         | 
| @@ -519,8 +534,12 @@ module Bio | |
| 519 534 | 
             
                  attr_reader :header
         | 
| 520 535 | 
             
                  # @return [String] path of MAF file being parsed.
         | 
| 521 536 | 
             
                  attr_reader :file_spec
         | 
| 522 | 
            -
                  # @return [ | 
| 537 | 
            +
                  # @return [IO] file handle for MAF file.
         | 
| 523 538 | 
             
                  attr_reader :f
         | 
| 539 | 
            +
                  # May be gzip-compressed.
         | 
| 540 | 
            +
                  # @return [IO] file handle for physical MAF file.
         | 
| 541 | 
            +
                  # @api private
         | 
| 542 | 
            +
                  attr_reader :phys_f
         | 
| 524 543 | 
             
                  # @return [StringScanner] scanner for parsing.
         | 
| 525 544 | 
             
                  attr_reader :s
         | 
| 526 545 | 
             
                  # @return [ChunkReader] ChunkReader.
         | 
| @@ -547,33 +566,47 @@ module Bio | |
| 547 566 | 
             
                  RANDOM_CHUNK_SIZE = 4096
         | 
| 548 567 | 
             
                  MERGE_MAX = SEQ_CHUNK_SIZE
         | 
| 549 568 |  | 
| 569 | 
            +
                  DEFAULT_OPTS = {
         | 
| 570 | 
            +
                    :chunk_size => SEQ_CHUNK_SIZE,
         | 
| 571 | 
            +
                    :random_chunk_size => RANDOM_CHUNK_SIZE,
         | 
| 572 | 
            +
                    :merge_max => MERGE_MAX,
         | 
| 573 | 
            +
                    :parse_extended => false,
         | 
| 574 | 
            +
                    :parse_empty => false,
         | 
| 575 | 
            +
                    :readahead_thread => true,
         | 
| 576 | 
            +
                    :seq_parse_thread => true
         | 
| 577 | 
            +
                  }
         | 
| 578 | 
            +
                  if JRUBY_P
         | 
| 579 | 
            +
                    DEFAULT_OPTS[:threads] = java.lang.Runtime.runtime.availableProcessors
         | 
| 580 | 
            +
                  end
         | 
| 581 | 
            +
             | 
| 550 582 | 
             
                  # Create a new parser instance.
         | 
| 551 583 | 
             
                  #
         | 
| 552 584 | 
             
                  # @param [String] file_spec path of file to parse.
         | 
| 553 | 
            -
                  # @param [Hash]  | 
| 585 | 
            +
                  # @param [Hash] parse_opts parser options.
         | 
| 554 586 | 
             
                  # @api public
         | 
| 555 | 
            -
                  def initialize(file_spec,  | 
| 587 | 
            +
                  def initialize(file_spec, parse_opts={})
         | 
| 588 | 
            +
                    opts = DEFAULT_OPTS.merge(parse_opts)
         | 
| 556 589 | 
             
                    @opts = opts
         | 
| 557 | 
            -
                     | 
| 558 | 
            -
             | 
| 559 | 
            -
                     | 
| 560 | 
            -
                     | 
| 561 | 
            -
                    @random_access_chunk_size = opts[:random_chunk_size] || RANDOM_CHUNK_SIZE
         | 
| 562 | 
            -
                    @merge_max = opts[:merge_max] || MERGE_MAX
         | 
| 563 | 
            -
                    @parse_extended = opts[:parse_extended] || false
         | 
| 564 | 
            -
                    @parse_empty = opts[:parse_empty] || false
         | 
| 590 | 
            +
                    @random_access_chunk_size = opts[:random_chunk_size]
         | 
| 591 | 
            +
                    @merge_max = opts[:merge_max]
         | 
| 592 | 
            +
                    @parse_extended = opts[:parse_extended]
         | 
| 593 | 
            +
                    @parse_empty = opts[:parse_empty]
         | 
| 565 594 | 
             
                    @chunk_start = 0
         | 
| 566 595 | 
             
                    if file_spec.respond_to? :flush
         | 
| 596 | 
            +
                      # an IO object
         | 
| 567 597 | 
             
                      # guess what, Pathnames respond to :read...
         | 
| 568 598 | 
             
                      @f = file_spec
         | 
| 569 599 | 
             
                      @file_spec = @f.path if @f.respond_to?(:path)
         | 
| 570 | 
            -
                      # TODO: gzip?
         | 
| 600 | 
            +
                      # TODO: test for gzip?
         | 
| 571 601 | 
             
                    else
         | 
| 602 | 
            +
                      # a pathname (or Pathname)
         | 
| 572 603 | 
             
                      @file_spec = file_spec
         | 
| 604 | 
            +
                      @phys_f = File.open(file_spec)
         | 
| 573 605 | 
             
                      if file_spec.to_s.end_with?(".maf.gz")
         | 
| 574 | 
            -
                        @f =  | 
| 606 | 
            +
                        @f = Zlib::GzipReader.new(@phys_f)
         | 
| 607 | 
            +
                        @compression = :gzip
         | 
| 575 608 | 
             
                      else
         | 
| 576 | 
            -
                        @f =  | 
| 609 | 
            +
                        @f = @phys_f
         | 
| 577 610 | 
             
                      end
         | 
| 578 611 | 
             
                    end
         | 
| 579 612 | 
             
                    if @file_spec.to_s =~ /\.bgzf?$/
         | 
| @@ -582,8 +615,9 @@ module Bio | |
| 582 615 | 
             
                    else
         | 
| 583 616 | 
             
                      @base_reader = ChunkReader
         | 
| 584 617 | 
             
                    end
         | 
| 585 | 
            -
                    @cr = base_reader.new(@f, chunk_size)
         | 
| 586 | 
            -
                    if  | 
| 618 | 
            +
                    @cr = base_reader.new(@f, opts[:chunk_size])
         | 
| 619 | 
            +
                    if JRUBY_P && opts[:readahead_thread]
         | 
| 620 | 
            +
                      LOG.debug "Using ThreadedChunkReaderWrapper."
         | 
| 587 621 | 
             
                      @cr = ThreadedChunkReaderWrapper.new(@cr)
         | 
| 588 622 | 
             
                    end
         | 
| 589 623 | 
             
                    @s = StringScanner.new(cr.read_chunk())
         | 
| @@ -649,7 +683,7 @@ module Bio | |
| 649 683 | 
             
                  def fetch_blocks(fetch_list, &blk)
         | 
| 650 684 | 
             
                    if blk
         | 
| 651 685 | 
             
                      merged = merge_fetch_list(fetch_list)
         | 
| 652 | 
            -
                      if  | 
| 686 | 
            +
                      if JRUBY_P && @opts.fetch(:threads, 1) > 1
         | 
| 653 687 | 
             
                        fun = lambda { |&b2| fetch_blocks_merged_parallel(merged, &b2) }
         | 
| 654 688 | 
             
                      else
         | 
| 655 689 | 
             
                        fun = lambda { |&b2| fetch_blocks_merged(merged, &b2) }
         | 
| @@ -667,15 +701,17 @@ module Bio | |
| 667 701 | 
             
                  def fetch_blocks_merged(fetch_list, &blk)
         | 
| 668 702 | 
             
                    start = Time.now
         | 
| 669 703 | 
             
                    total_size = fetch_list.collect { |e| e[1] }.reduce(:+)
         | 
| 704 | 
            +
                    count = 0
         | 
| 670 705 | 
             
                    with_context(@random_access_chunk_size) do |ctx|
         | 
| 671 706 | 
             
                      fetch_list.each do |e|
         | 
| 672 707 | 
             
                        ctx.fetch_blocks(*e, &blk)
         | 
| 708 | 
            +
                        count += 1
         | 
| 673 709 | 
             
                      end
         | 
| 674 710 | 
             
                    end
         | 
| 675 711 | 
             
                    elapsed = Time.now - start
         | 
| 676 712 | 
             
                    rate = (total_size / 1048576.0) / elapsed
         | 
| 677 | 
            -
                    LOG.debug { sprintf("Fetched blocks in %.3fs, %.1f MB/s.",
         | 
| 678 | 
            -
                                        elapsed, rate) }
         | 
| 713 | 
            +
                    LOG.debug { sprintf("Fetched %d blocks in %.3fs, %.1f MB/s.",
         | 
| 714 | 
            +
                                        count, elapsed, rate) }
         | 
| 679 715 | 
             
                  end
         | 
| 680 716 |  | 
| 681 717 | 
             
                  # Fetch and parse the blocks given by the merged fetch list, in
         | 
| @@ -807,7 +843,9 @@ module Bio | |
| 807 843 | 
             
                      end
         | 
| 808 844 | 
             
                    end
         | 
| 809 845 | 
             
                    @header = Header.new(vars, align_params)
         | 
| 810 | 
            -
                    s.skip_until | 
| 846 | 
            +
                    if ! s.skip_until(BLOCK_START)
         | 
| 847 | 
            +
                      @at_end = true
         | 
| 848 | 
            +
                    end
         | 
| 811 849 | 
             
                  end
         | 
| 812 850 |  | 
| 813 851 | 
             
                  # Parse all alignment blocks until EOF.
         | 
| @@ -820,7 +858,7 @@ module Bio | |
| 820 858 | 
             
                  # @api public
         | 
| 821 859 | 
             
                  def each_block(&blk)
         | 
| 822 860 | 
             
                    if block_given?
         | 
| 823 | 
            -
                      if  | 
| 861 | 
            +
                      if JRUBY_P && opts[:seq_parse_thread]
         | 
| 824 862 | 
             
                        fun = method(:parse_blocks_parallel)
         | 
| 825 863 | 
             
                      else
         | 
| 826 864 | 
             
                        fun = method(:each_block_seq)
         | 
| @@ -847,11 +885,12 @@ module Bio | |
| 847 885 | 
             
                    b
         | 
| 848 886 | 
             
                  end
         | 
| 849 887 |  | 
| 850 | 
            -
                  WRAP_OPTS = [:as_bio_alignment, :join_blocks, :remove_gaps]
         | 
| 888 | 
            +
                  WRAP_OPTS = [:as_bio_alignment, :join_blocks, :remove_gaps, :upcase]
         | 
| 851 889 |  | 
| 852 890 | 
             
                  def wrap_block_seq(fun, &blk)
         | 
| 853 891 | 
             
                    opts = WRAP_OPTS.find_all { |o| @opts[o] }
         | 
| 854 892 | 
             
                    opts << :sequence_filter if sequence_filter && (! sequence_filter.empty?)
         | 
| 893 | 
            +
                    LOG.debug { "wrapping #{fun} with #{opts.inspect}" }
         | 
| 855 894 | 
             
                    _wrap(opts, fun, &blk)
         | 
| 856 895 | 
             
                  end
         | 
| 857 896 |  | 
| @@ -873,6 +912,12 @@ module Bio | |
| 873 912 | 
             
                                fun,
         | 
| 874 913 | 
             
                                :to_bio_alignment,
         | 
| 875 914 | 
             
                                &blk)
         | 
| 915 | 
            +
                    when :upcase
         | 
| 916 | 
            +
                      conv_send(options,
         | 
| 917 | 
            +
                                fun,
         | 
| 918 | 
            +
                                :upcase!,
         | 
| 919 | 
            +
                                true,
         | 
| 920 | 
            +
                                &blk)
         | 
| 876 921 | 
             
                    when :remove_gaps
         | 
| 877 922 | 
             
                      conv_map(options,
         | 
| 878 923 | 
             
                               fun,
         | 
| @@ -910,10 +955,14 @@ module Bio | |
| 910 955 | 
             
                    end
         | 
| 911 956 | 
             
                  end
         | 
| 912 957 |  | 
| 913 | 
            -
                  def conv_send(options, search, sym)
         | 
| 958 | 
            +
                  def conv_send(options, search, sym, always_yield_block=false)
         | 
| 914 959 | 
             
                    _wrap(options, search) do |block|
         | 
| 915 960 | 
             
                      v = block.send(sym)
         | 
| 916 | 
            -
                       | 
| 961 | 
            +
                      if always_yield_block
         | 
| 962 | 
            +
                        yield block
         | 
| 963 | 
            +
                      else
         | 
| 964 | 
            +
                        yield v if v
         | 
| 965 | 
            +
                      end
         | 
| 917 966 | 
             
                    end
         | 
| 918 967 | 
             
                  end
         | 
| 919 968 |  | 
| @@ -925,14 +974,17 @@ module Bio | |
| 925 974 | 
             
                    queue = java.util.concurrent.LinkedBlockingQueue.new(128)
         | 
| 926 975 | 
             
                    worker = Thread.new do
         | 
| 927 976 | 
             
                      begin
         | 
| 977 | 
            +
                        LOG.debug "Starting parse worker."
         | 
| 928 978 | 
             
                        until at_end
         | 
| 929 979 | 
             
                          block = _parse_block()
         | 
| 930 980 | 
             
                          queue.put(block) if block
         | 
| 931 981 | 
             
                        end
         | 
| 932 982 | 
             
                        queue.put(:eof)
         | 
| 933 | 
            -
             | 
| 934 | 
            -
             | 
| 983 | 
            +
                        LOG.debug { "Parse worker reached EOF." }
         | 
| 984 | 
            +
                      rescue Exception
         | 
| 935 985 | 
             
                        LOG.error $!
         | 
| 986 | 
            +
                        Thread.current[:exception] = $!
         | 
| 987 | 
            +
                        raise
         | 
| 936 988 | 
             
                      end
         | 
| 937 989 | 
             
                    end
         | 
| 938 990 | 
             
                    saw_eof = false
         | 
| @@ -946,12 +998,15 @@ module Bio | |
| 946 998 | 
             
                        yield block
         | 
| 947 999 | 
             
                      else
         | 
| 948 1000 | 
             
                        # timed out
         | 
| 949 | 
            -
                         | 
| 1001 | 
            +
                        unless worker.alive?
         | 
| 1002 | 
            +
                          LOG.debug "Worker has exited."
         | 
| 1003 | 
            +
                          n_final_poll += 1
         | 
| 1004 | 
            +
                        end
         | 
| 950 1005 | 
             
                      end
         | 
| 951 1006 | 
             
                      break if n_final_poll > 1
         | 
| 952 1007 | 
             
                    end
         | 
| 953 1008 | 
             
                    unless saw_eof
         | 
| 954 | 
            -
                      raise "worker exited unexpectedly!"
         | 
| 1009 | 
            +
                      raise "worker exited unexpectedly from #{worker[:exception]}!"
         | 
| 955 1010 | 
             
                    end
         | 
| 956 1011 | 
             
                  end
         | 
| 957 1012 |  | 
| @@ -1000,7 +1055,7 @@ module Bio | |
| 1000 1055 |  | 
| 1001 1056 | 
             
                def handle_logging_options(opts)
         | 
| 1002 1057 | 
             
                  opts.on("--logger filename", String,
         | 
| 1003 | 
            -
                          "Log to file (default  | 
| 1058 | 
            +
                          "Log to file (default STDERR)") do |name|
         | 
| 1004 1059 | 
             
                    Bio::Log::CLI.logger(name)
         | 
| 1005 1060 | 
             
                  end
         | 
| 1006 1061 | 
             
                  opts.on("--trace options", String,
         |