bio-maf 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.document +5 -0
 - data/.simplecov +1 -0
 - data/.travis.yml +16 -0
 - data/.yardopts +3 -0
 - data/DEVELOPMENT.md +40 -0
 - data/Gemfile +23 -0
 - data/LICENSE.txt +20 -0
 - data/README.md +209 -0
 - data/Rakefile +76 -0
 - data/VERSION +1 -0
 - data/benchmarks/dispatch_bench +53 -0
 - data/benchmarks/iter_bench +44 -0
 - data/benchmarks/read_bench +40 -0
 - data/benchmarks/sort_bench +33 -0
 - data/benchmarks/split_bench +33 -0
 - data/bin/maf_count +82 -0
 - data/bin/maf_dump_blocks +27 -0
 - data/bin/maf_extract_ranges_count +44 -0
 - data/bin/maf_index +88 -0
 - data/bin/maf_parse_bench +94 -0
 - data/bin/maf_to_fasta +68 -0
 - data/bin/maf_write +84 -0
 - data/bin/random_ranges +35 -0
 - data/features/maf-indexing.feature +31 -0
 - data/features/maf-output.feature +29 -0
 - data/features/maf-parsing.feature +44 -0
 - data/features/maf-querying.feature +75 -0
 - data/features/maf-to-fasta.feature +50 -0
 - data/features/step_definitions/convert_steps.rb +45 -0
 - data/features/step_definitions/index_steps.rb +20 -0
 - data/features/step_definitions/output_steps.rb +27 -0
 - data/features/step_definitions/parse_steps.rb +63 -0
 - data/features/step_definitions/query_steps.rb +31 -0
 - data/features/step_definitions/ucsc_bin_steps.rb +14 -0
 - data/features/support/env.rb +16 -0
 - data/features/ucsc-bins.feature +24 -0
 - data/lib/bio/maf/index.rb +620 -0
 - data/lib/bio/maf/parser.rb +888 -0
 - data/lib/bio/maf/struct.rb +63 -0
 - data/lib/bio/maf/writer.rb +63 -0
 - data/lib/bio/maf.rb +4 -0
 - data/lib/bio/ucsc/genomic-interval-bin.rb +13 -0
 - data/lib/bio/ucsc/ucsc_bin.rb +117 -0
 - data/lib/bio/ucsc.rb +2 -0
 - data/lib/bio-maf/maf.rb +3 -0
 - data/lib/bio-maf.rb +12 -0
 - data/man/.gitignore +1 -0
 - data/man/maf_index.1 +105 -0
 - data/man/maf_index.1.markdown +97 -0
 - data/man/maf_index.1.ronn +83 -0
 - data/man/maf_to_fasta.1 +53 -0
 - data/man/maf_to_fasta.1.ronn +51 -0
 - data/spec/bio/maf/index_spec.rb +363 -0
 - data/spec/bio/maf/parser_spec.rb +354 -0
 - data/spec/bio/maf/struct_spec.rb +75 -0
 - data/spec/spec_helper.rb +14 -0
 - data/test/data/big-block.maf +15999 -0
 - data/test/data/chr22_ieq.maf +11 -0
 - data/test/data/chrY-1block.maf +6 -0
 - data/test/data/empty +0 -0
 - data/test/data/empty.db +0 -0
 - data/test/data/mm8_chr7_tiny.kct +0 -0
 - data/test/data/mm8_chr7_tiny.maf +76 -0
 - data/test/data/mm8_mod_a.maf +7 -0
 - data/test/data/mm8_single.maf +13 -0
 - data/test/data/mm8_subset_a.maf +23 -0
 - data/test/data/t1-bad1.maf +15 -0
 - data/test/data/t1.fasta +12 -0
 - data/test/data/t1.maf +15 -0
 - data/test/data/t1a.maf +17 -0
 - data/test/helper.rb +18 -0
 - data/test/test_bio-maf.rb +7 -0
 - data/travis-ci/install_kc +13 -0
 - data/travis-ci/install_kc_java +13 -0
 - data/travis-ci/report_errors +4 -0
 - metadata +181 -0
 
    
        data/bin/maf_count
    ADDED
    
    | 
         @@ -0,0 +1,82 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'bigbio'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 9 
     | 
    
         
            +
            options.parser = Bio::MAF::Parser
         
     | 
| 
      
 10 
     | 
    
         
            +
            options.reader = Bio::MAF::ChunkReader
         
     | 
| 
      
 11 
     | 
    
         
            +
             
     | 
| 
      
 12 
     | 
    
         
            +
            PRINTERS = {
         
     | 
| 
      
 13 
     | 
    
         
            +
              'flat' => :FlatPrinter,
         
     | 
| 
      
 14 
     | 
    
         
            +
              'stack' => :CallStackPrinter
         
     | 
| 
      
 15 
     | 
    
         
            +
            }
         
     | 
| 
      
 16 
     | 
    
         
            +
             
     | 
| 
      
 17 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 18 
     | 
    
         
            +
              opts.banner = "Usage: maf_count [options] <maf>"
         
     | 
| 
      
 19 
     | 
    
         
            +
              opts.separator ""
         
     | 
| 
      
 20 
     | 
    
         
            +
              opts.separator "Options:"
         
     | 
| 
      
 21 
     | 
    
         
            +
              opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
         
     | 
| 
      
 22 
     | 
    
         
            +
                options.prof = prof
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
              opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
         
     | 
| 
      
 25 
     | 
    
         
            +
                if pspec =~ /(\w+):(.+)/
         
     | 
| 
      
 26 
     | 
    
         
            +
                  require 'ruby-prof'
         
     | 
| 
      
 27 
     | 
    
         
            +
                  options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
         
     | 
| 
      
 28 
     | 
    
         
            +
                  options.ruby_prof_path = $2
         
     | 
| 
      
 29 
     | 
    
         
            +
                else
         
     | 
| 
      
 30 
     | 
    
         
            +
                  options.ruby_prof_printer = RubyProf::FlatPrinter
         
     | 
| 
      
 31 
     | 
    
         
            +
                  options.ruby_prof_path = pspec
         
     | 
| 
      
 32 
     | 
    
         
            +
                end
         
     | 
| 
      
 33 
     | 
    
         
            +
              end
         
     | 
| 
      
 34 
     | 
    
         
            +
              opts.on("--profile-gc", "Profile GC") do |prof|
         
     | 
| 
      
 35 
     | 
    
         
            +
                options.profile_gc = true
         
     | 
| 
      
 36 
     | 
    
         
            +
              end
         
     | 
| 
      
 37 
     | 
    
         
            +
              opts.on("--parser PARSER", "parser") do |name|
         
     | 
| 
      
 38 
     | 
    
         
            +
                options.parser = Bio::MAF.const_get(name)
         
     | 
| 
      
 39 
     | 
    
         
            +
              end
         
     | 
| 
      
 40 
     | 
    
         
            +
              opts.on("-t", "--threaded") do
         
     | 
| 
      
 41 
     | 
    
         
            +
                options.reader = Bio::MAF::ThreadedChunkReader
         
     | 
| 
      
 42 
     | 
    
         
            +
              end
         
     | 
| 
      
 43 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     | 
| 
      
 45 
     | 
    
         
            +
            src_path = ARGV.shift
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 48 
     | 
    
         
            +
              require 'perftools'
         
     | 
| 
      
 49 
     | 
    
         
            +
              PerfTools::CpuProfiler.start(options.prof)
         
     | 
| 
      
 50 
     | 
    
         
            +
            elsif options.ruby_prof_path
         
     | 
| 
      
 51 
     | 
    
         
            +
              require 'ruby-prof'
         
     | 
| 
      
 52 
     | 
    
         
            +
              RubyProf.start
         
     | 
| 
      
 53 
     | 
    
         
            +
            end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 56 
     | 
    
         
            +
              GC::Profiler.enable
         
     | 
| 
      
 57 
     | 
    
         
            +
            end
         
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
      
 59 
     | 
    
         
            +
            parser = options.parser.new(src_path,
         
     | 
| 
      
 60 
     | 
    
         
            +
                                        :chunk_reader => options.reader,
         
     | 
| 
      
 61 
     | 
    
         
            +
                                        :parse_extended => false)
         
     | 
| 
      
 62 
     | 
    
         
            +
             
     | 
| 
      
 63 
     | 
    
         
            +
            n = 0
         
     | 
| 
      
 64 
     | 
    
         
            +
            parser.each_block do |block|
         
     | 
| 
      
 65 
     | 
    
         
            +
              n += 1
         
     | 
| 
      
 66 
     | 
    
         
            +
            end
         
     | 
| 
      
 67 
     | 
    
         
            +
            puts "Parsed #{n} MAF alignment blocks."
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 70 
     | 
    
         
            +
              $stderr.puts GC::Profiler.result
         
     | 
| 
      
 71 
     | 
    
         
            +
              GC::Profiler.disable
         
     | 
| 
      
 72 
     | 
    
         
            +
            end
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 75 
     | 
    
         
            +
              PerfTools::CpuProfiler.stop
         
     | 
| 
      
 76 
     | 
    
         
            +
            elsif options.ruby_prof_path
         
     | 
| 
      
 77 
     | 
    
         
            +
              res = RubyProf.stop
         
     | 
| 
      
 78 
     | 
    
         
            +
              printer = options.ruby_prof_printer.new(res)
         
     | 
| 
      
 79 
     | 
    
         
            +
              File.open(options.ruby_prof_path, 'w') do |f|
         
     | 
| 
      
 80 
     | 
    
         
            +
                printer.print(f)
         
     | 
| 
      
 81 
     | 
    
         
            +
              end
         
     | 
| 
      
 82 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/maf_dump_blocks
    ADDED
    
    | 
         @@ -0,0 +1,27 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'bigbio'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 9 
     | 
    
         
            +
            options.parser = Bio::MAF::Parser
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 12 
     | 
    
         
            +
              opts.banner = "Usage: maf_dump_blocks [options] <maf>"
         
     | 
| 
      
 13 
     | 
    
         
            +
              opts.separator ""
         
     | 
| 
      
 14 
     | 
    
         
            +
              opts.separator "Options:"
         
     | 
| 
      
 15 
     | 
    
         
            +
              opts.on("--parser PARSER", "parser") do |name|
         
     | 
| 
      
 16 
     | 
    
         
            +
                options.parser = Bio::MAF.const_get(name)
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
            src_path = ARGV.shift
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
            parser = options.parser.new(src_path)
         
     | 
| 
      
 23 
     | 
    
         
            +
             
     | 
| 
      
 24 
     | 
    
         
            +
            parser.each_block do |block|
         
     | 
| 
      
 25 
     | 
    
         
            +
              $stdout.printf("%12d\t%7d\n", block.offset, block.size)
         
     | 
| 
      
 26 
     | 
    
         
            +
            end
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
         @@ -0,0 +1,44 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 7 
     | 
    
         
            +
            require 'bio-genomic-interval'
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 10 
     | 
    
         
            +
            options.p = { :threads => 1 }
         
     | 
| 
      
 11 
     | 
    
         
            +
            options.passes = 1
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 14 
     | 
    
         
            +
              opts.banner = "Usage: maf_extract_ranges_count [options] <maf> <index>"
         
     | 
| 
      
 15 
     | 
    
         
            +
              opts.separator ""
         
     | 
| 
      
 16 
     | 
    
         
            +
              opts.separator "Options:"
         
     | 
| 
      
 17 
     | 
    
         
            +
              opts.on("-t", "--threads N", "Parser threads") do |n|
         
     | 
| 
      
 18 
     | 
    
         
            +
                options.p[:threads] = n.to_i
         
     | 
| 
      
 19 
     | 
    
         
            +
              end
         
     | 
| 
      
 20 
     | 
    
         
            +
              opts.on("-p", "--passes N", "Number of passes") do |n|
         
     | 
| 
      
 21 
     | 
    
         
            +
                options.passes = n.to_i
         
     | 
| 
      
 22 
     | 
    
         
            +
              end
         
     | 
| 
      
 23 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 24 
     | 
    
         
            +
             
     | 
| 
      
 25 
     | 
    
         
            +
            maf_p = ARGV.shift
         
     | 
| 
      
 26 
     | 
    
         
            +
            index_p = ARGV.shift
         
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
            parser = Bio::MAF::Parser.new(maf_p, options.p)
         
     | 
| 
      
 29 
     | 
    
         
            +
            index = Bio::MAF::KyotoIndex.open(index_p)
         
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
      
 31 
     | 
    
         
            +
            def parse_interval(line)
         
     | 
| 
      
 32 
     | 
    
         
            +
              src, r_start_s, r_end_s, _ = line.split(nil, 4)
         
     | 
| 
      
 33 
     | 
    
         
            +
              r_start = r_start_s.to_i
         
     | 
| 
      
 34 
     | 
    
         
            +
              r_end = r_end_s.to_i
         
     | 
| 
      
 35 
     | 
    
         
            +
              return Bio::GenomicInterval.zero_based(src, r_start, r_end)
         
     | 
| 
      
 36 
     | 
    
         
            +
            end
         
     | 
| 
      
 37 
     | 
    
         
            +
             
     | 
| 
      
 38 
     | 
    
         
            +
            intervals = []
         
     | 
| 
      
 39 
     | 
    
         
            +
            $stdin.each_line { |line| intervals << parse_interval(line) }
         
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
            options.passes.times do
         
     | 
| 
      
 42 
     | 
    
         
            +
              blocks = index.find(intervals, parser)
         
     | 
| 
      
 43 
     | 
    
         
            +
              puts "TOTAL: #{blocks.count} blocks parsed."
         
     | 
| 
      
 44 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/maf_index
    ADDED
    
    | 
         @@ -0,0 +1,88 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'benchmark'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            PRINTERS = {
         
     | 
| 
      
 9 
     | 
    
         
            +
              'flat' => :FlatPrinter,
         
     | 
| 
      
 10 
     | 
    
         
            +
              'stack' => :CallStackPrinter,
         
     | 
| 
      
 11 
     | 
    
         
            +
              'graph' => :GraphHtmlPrinter
         
     | 
| 
      
 12 
     | 
    
         
            +
            }
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            $options = OpenStruct.new
         
     | 
| 
      
 15 
     | 
    
         
            +
            $options.mode = :build
         
     | 
| 
      
 16 
     | 
    
         
            +
            $options.reader = Bio::MAF::ChunkReader
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            def build_index(maf, index)
         
     | 
| 
      
 19 
     | 
    
         
            +
              parser = Bio::MAF::Parser.new(maf,
         
     | 
| 
      
 20 
     | 
    
         
            +
                                            :chunk_reader => $options.reader,
         
     | 
| 
      
 21 
     | 
    
         
            +
                                            :parse_extended => false)
         
     | 
| 
      
 22 
     | 
    
         
            +
              idx = Bio::MAF::KyotoIndex.build(parser, index)
         
     | 
| 
      
 23 
     | 
    
         
            +
              idx.close
         
     | 
| 
      
 24 
     | 
    
         
            +
            end
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            op = OptionParser.new do |opts|
         
     | 
| 
      
 27 
     | 
    
         
            +
              opts.banner = "Usage: maf_index [options] <maf> <index>"
         
     | 
| 
      
 28 
     | 
    
         
            +
              #opts.separator ""
         
     | 
| 
      
 29 
     | 
    
         
            +
              #opts.separator "Options:"
         
     | 
| 
      
 30 
     | 
    
         
            +
              opts.on("--time", "print elapsed time") do
         
     | 
| 
      
 31 
     | 
    
         
            +
                $options.bench = true
         
     | 
| 
      
 32 
     | 
    
         
            +
              end
         
     | 
| 
      
 33 
     | 
    
         
            +
              opts.on("-d", "--dump") do
         
     | 
| 
      
 34 
     | 
    
         
            +
                $options.mode = :dump
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
              opts.on("-t", "--threaded") do
         
     | 
| 
      
 37 
     | 
    
         
            +
                $options.reader = Bio::MAF::ThreadedChunkReader
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
              opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
         
     | 
| 
      
 40 
     | 
    
         
            +
                require 'ruby-prof'
         
     | 
| 
      
 41 
     | 
    
         
            +
                if pspec =~ /(\w+):(.+)/
         
     | 
| 
      
 42 
     | 
    
         
            +
                  $options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
         
     | 
| 
      
 43 
     | 
    
         
            +
                  $options.ruby_prof_path = $2
         
     | 
| 
      
 44 
     | 
    
         
            +
                else
         
     | 
| 
      
 45 
     | 
    
         
            +
                  $options.ruby_prof_printer = Ruby_Prof::FlatPrinter
         
     | 
| 
      
 46 
     | 
    
         
            +
                  $options.ruby_prof_path = pspec
         
     | 
| 
      
 47 
     | 
    
         
            +
                end
         
     | 
| 
      
 48 
     | 
    
         
            +
              end
         
     | 
| 
      
 49 
     | 
    
         
            +
            end
         
     | 
| 
      
 50 
     | 
    
         
            +
             
     | 
| 
      
 51 
     | 
    
         
            +
            op.parse!(ARGV)
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            maf_p = ARGV.shift if $options.mode == :build
         
     | 
| 
      
 54 
     | 
    
         
            +
            index_p = ARGV.shift
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
            unless (maf_p || $options.mode == :dump) && index_p
         
     | 
| 
      
 57 
     | 
    
         
            +
              $stderr.puts op
         
     | 
| 
      
 58 
     | 
    
         
            +
              exit 1
         
     | 
| 
      
 59 
     | 
    
         
            +
            end
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
            if $options.ruby_prof_path
         
     | 
| 
      
 62 
     | 
    
         
            +
              RubyProf.start
         
     | 
| 
      
 63 
     | 
    
         
            +
            end
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            case $options.mode
         
     | 
| 
      
 66 
     | 
    
         
            +
            when :build
         
     | 
| 
      
 67 
     | 
    
         
            +
              if ! $options.bench
         
     | 
| 
      
 68 
     | 
    
         
            +
                build_index(maf_p, index_p)
         
     | 
| 
      
 69 
     | 
    
         
            +
              else
         
     | 
| 
      
 70 
     | 
    
         
            +
                bm_res = Benchmark.measure do
         
     | 
| 
      
 71 
     | 
    
         
            +
                  build_index(maf_p, index_p)
         
     | 
| 
      
 72 
     | 
    
         
            +
                end
         
     | 
| 
      
 73 
     | 
    
         
            +
                puts bm_res
         
     | 
| 
      
 74 
     | 
    
         
            +
              end
         
     | 
| 
      
 75 
     | 
    
         
            +
            when :dump
         
     | 
| 
      
 76 
     | 
    
         
            +
              idx = Bio::MAF::KyotoIndex.open(index_p)
         
     | 
| 
      
 77 
     | 
    
         
            +
              idx.dump
         
     | 
| 
      
 78 
     | 
    
         
            +
            else
         
     | 
| 
      
 79 
     | 
    
         
            +
              raise "Unsupported mode: #{$options.mode}"
         
     | 
| 
      
 80 
     | 
    
         
            +
            end
         
     | 
| 
      
 81 
     | 
    
         
            +
             
     | 
| 
      
 82 
     | 
    
         
            +
            if $options.ruby_prof_path
         
     | 
| 
      
 83 
     | 
    
         
            +
              res = RubyProf.stop
         
     | 
| 
      
 84 
     | 
    
         
            +
              printer = $options.ruby_prof_printer.new(res)
         
     | 
| 
      
 85 
     | 
    
         
            +
              File.open($options.ruby_prof_path, 'w') do |f|
         
     | 
| 
      
 86 
     | 
    
         
            +
                printer.print(f)
         
     | 
| 
      
 87 
     | 
    
         
            +
              end
         
     | 
| 
      
 88 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/maf_parse_bench
    ADDED
    
    | 
         @@ -0,0 +1,94 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'benchmark'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 9 
     | 
    
         
            +
            options.parser = Bio::MAF::Parser
         
     | 
| 
      
 10 
     | 
    
         
            +
            options.runs = 100_000
         
     | 
| 
      
 11 
     | 
    
         
            +
            options.warmup = false
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
            PRINTERS = {
         
     | 
| 
      
 14 
     | 
    
         
            +
              'flat' => :FlatPrinter,
         
     | 
| 
      
 15 
     | 
    
         
            +
              'stack' => :CallStackPrinter
         
     | 
| 
      
 16 
     | 
    
         
            +
            }
         
     | 
| 
      
 17 
     | 
    
         
            +
             
     | 
| 
      
 18 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 19 
     | 
    
         
            +
              opts.banner = "Usage: maf_parse_bench [options] <maf>"
         
     | 
| 
      
 20 
     | 
    
         
            +
              opts.separator ""
         
     | 
| 
      
 21 
     | 
    
         
            +
              opts.separator "Options:"
         
     | 
| 
      
 22 
     | 
    
         
            +
              opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
         
     | 
| 
      
 23 
     | 
    
         
            +
                options.prof = prof
         
     | 
| 
      
 24 
     | 
    
         
            +
              end
         
     | 
| 
      
 25 
     | 
    
         
            +
              opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
         
     | 
| 
      
 26 
     | 
    
         
            +
                if pspec =~ /(\w+):(.+)/
         
     | 
| 
      
 27 
     | 
    
         
            +
                  require 'ruby-prof'
         
     | 
| 
      
 28 
     | 
    
         
            +
                  options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
         
     | 
| 
      
 29 
     | 
    
         
            +
                  options.ruby_prof_path = $2
         
     | 
| 
      
 30 
     | 
    
         
            +
                else
         
     | 
| 
      
 31 
     | 
    
         
            +
                  options.ruby_prof_printer = :FlatPrinter
         
     | 
| 
      
 32 
     | 
    
         
            +
                  options.ruby_prof_path = pspec
         
     | 
| 
      
 33 
     | 
    
         
            +
                end
         
     | 
| 
      
 34 
     | 
    
         
            +
              end
         
     | 
| 
      
 35 
     | 
    
         
            +
              opts.on("--profile-gc", "Profile GC") do |prof|
         
     | 
| 
      
 36 
     | 
    
         
            +
                options.profile_gc = true
         
     | 
| 
      
 37 
     | 
    
         
            +
              end
         
     | 
| 
      
 38 
     | 
    
         
            +
              opts.on("--parser PARSER", "parser") do |name|
         
     | 
| 
      
 39 
     | 
    
         
            +
                options.parser = Bio::MAF.const_get(name)
         
     | 
| 
      
 40 
     | 
    
         
            +
              end
         
     | 
| 
      
 41 
     | 
    
         
            +
              opts.on("-w", "--warmup", "perform warmup run") do 
         
     | 
| 
      
 42 
     | 
    
         
            +
                options.warmup = true
         
     | 
| 
      
 43 
     | 
    
         
            +
              end
         
     | 
| 
      
 44 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 45 
     | 
    
         
            +
             
     | 
| 
      
 46 
     | 
    
         
            +
            src_path = ARGV.shift
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 49 
     | 
    
         
            +
              require 'perftools'
         
     | 
| 
      
 50 
     | 
    
         
            +
              PerfTools::CpuProfiler.start(options.prof)
         
     | 
| 
      
 51 
     | 
    
         
            +
            elsif options.ruby_prof_path
         
     | 
| 
      
 52 
     | 
    
         
            +
              require 'ruby-prof'
         
     | 
| 
      
 53 
     | 
    
         
            +
              RubyProf.start
         
     | 
| 
      
 54 
     | 
    
         
            +
            end
         
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
      
 56 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 57 
     | 
    
         
            +
              GC::Profiler.enable
         
     | 
| 
      
 58 
     | 
    
         
            +
            end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
            parser = options.parser.new(src_path)
         
     | 
| 
      
 61 
     | 
    
         
            +
            parser.parse_block
         
     | 
| 
      
 62 
     | 
    
         
            +
            parser.parse_block
         
     | 
| 
      
 63 
     | 
    
         
            +
            pos = parser.s.pos
         
     | 
| 
      
 64 
     | 
    
         
            +
             
     | 
| 
      
 65 
     | 
    
         
            +
            if options.warmup
         
     | 
| 
      
 66 
     | 
    
         
            +
              options.runs.times do
         
     | 
| 
      
 67 
     | 
    
         
            +
                parser.parse_block
         
     | 
| 
      
 68 
     | 
    
         
            +
                parser.s.pos = pos
         
     | 
| 
      
 69 
     | 
    
         
            +
              end
         
     | 
| 
      
 70 
     | 
    
         
            +
            end
         
     | 
| 
      
 71 
     | 
    
         
            +
             
     | 
| 
      
 72 
     | 
    
         
            +
            bm_res = Benchmark.measure do
         
     | 
| 
      
 73 
     | 
    
         
            +
              options.runs.times do
         
     | 
| 
      
 74 
     | 
    
         
            +
                parser.parse_block
         
     | 
| 
      
 75 
     | 
    
         
            +
                parser.s.pos = pos
         
     | 
| 
      
 76 
     | 
    
         
            +
              end
         
     | 
| 
      
 77 
     | 
    
         
            +
            end
         
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
      
 79 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 80 
     | 
    
         
            +
              $stderr.puts GC::Profiler.result
         
     | 
| 
      
 81 
     | 
    
         
            +
              GC::Profiler.disable
         
     | 
| 
      
 82 
     | 
    
         
            +
            end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 85 
     | 
    
         
            +
              PerfTools::CpuProfiler.stop
         
     | 
| 
      
 86 
     | 
    
         
            +
            elsif options.ruby_prof_path
         
     | 
| 
      
 87 
     | 
    
         
            +
              res = RubyProf.stop
         
     | 
| 
      
 88 
     | 
    
         
            +
              printer = options.ruby_prof_printer.new(res)
         
     | 
| 
      
 89 
     | 
    
         
            +
              File.open(options.ruby_prof_path, 'w') do |f|
         
     | 
| 
      
 90 
     | 
    
         
            +
                printer.print(f)
         
     | 
| 
      
 91 
     | 
    
         
            +
              end
         
     | 
| 
      
 92 
     | 
    
         
            +
            end
         
     | 
| 
      
 93 
     | 
    
         
            +
             
     | 
| 
      
 94 
     | 
    
         
            +
            puts bm_res / options.runs
         
     | 
    
        data/bin/maf_to_fasta
    ADDED
    
    | 
         @@ -0,0 +1,68 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'bigbio'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 6 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 9 
     | 
    
         
            +
            options.parser = Bio::MAF::Parser
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 12 
     | 
    
         
            +
              opts.banner = "Usage: maf_to_fasta [options] <maf> <fasta>"
         
     | 
| 
      
 13 
     | 
    
         
            +
              opts.separator ""
         
     | 
| 
      
 14 
     | 
    
         
            +
              opts.separator "Options:"
         
     | 
| 
      
 15 
     | 
    
         
            +
              opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
         
     | 
| 
      
 16 
     | 
    
         
            +
                options.prof = prof
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
              opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |path|
         
     | 
| 
      
 19 
     | 
    
         
            +
                options.ruby_prof = path
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
              opts.on("--profile-gc", "Profile GC") do |prof|
         
     | 
| 
      
 22 
     | 
    
         
            +
                options.profile_gc = true
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
              opts.on("--parser PARSER", "parser") do |name|
         
     | 
| 
      
 25 
     | 
    
         
            +
                options.parser = Bio::MAF.const_get(name)
         
     | 
| 
      
 26 
     | 
    
         
            +
              end
         
     | 
| 
      
 27 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            src_path = ARGV.shift
         
     | 
| 
      
 30 
     | 
    
         
            +
            dst_path = ARGV.shift
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 33 
     | 
    
         
            +
              require 'perftools'
         
     | 
| 
      
 34 
     | 
    
         
            +
              PerfTools::CpuProfiler.start(options.prof)
         
     | 
| 
      
 35 
     | 
    
         
            +
            elsif options.ruby_prof
         
     | 
| 
      
 36 
     | 
    
         
            +
              require 'ruby-prof'
         
     | 
| 
      
 37 
     | 
    
         
            +
              RubyProf.start
         
     | 
| 
      
 38 
     | 
    
         
            +
            end
         
     | 
| 
      
 39 
     | 
    
         
            +
             
     | 
| 
      
 40 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 41 
     | 
    
         
            +
              GC::Profiler.enable
         
     | 
| 
      
 42 
     | 
    
         
            +
            end
         
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
      
 44 
     | 
    
         
            +
            parser = options.parser.new(src_path)
         
     | 
| 
      
 45 
     | 
    
         
            +
            writer = FastaWriter.new(dst_path)
         
     | 
| 
      
 46 
     | 
    
         
            +
             
     | 
| 
      
 47 
     | 
    
         
            +
            parser.each_block do |block|
         
     | 
| 
      
 48 
     | 
    
         
            +
              block.each_raw_seq do |seq|
         
     | 
| 
      
 49 
     | 
    
         
            +
                seq.write_fasta(writer)
         
     | 
| 
      
 50 
     | 
    
         
            +
              end
         
     | 
| 
      
 51 
     | 
    
         
            +
            end
         
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
      
 53 
     | 
    
         
            +
            writer.close
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
| 
      
 55 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 56 
     | 
    
         
            +
              $stderr.puts GC::Profiler.result
         
     | 
| 
      
 57 
     | 
    
         
            +
              GC::Profiler.disable
         
     | 
| 
      
 58 
     | 
    
         
            +
            end
         
     | 
| 
      
 59 
     | 
    
         
            +
             
     | 
| 
      
 60 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 61 
     | 
    
         
            +
              PerfTools::CpuProfiler.stop
         
     | 
| 
      
 62 
     | 
    
         
            +
            elsif options.ruby_prof
         
     | 
| 
      
 63 
     | 
    
         
            +
              res = RubyProf.stop
         
     | 
| 
      
 64 
     | 
    
         
            +
              printer = RubyProf::FlatPrinter.new(res)
         
     | 
| 
      
 65 
     | 
    
         
            +
              File.open(options.ruby_prof, 'w') do |f|
         
     | 
| 
      
 66 
     | 
    
         
            +
                printer.print(f)
         
     | 
| 
      
 67 
     | 
    
         
            +
              end
         
     | 
| 
      
 68 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/maf_write
    ADDED
    
    | 
         @@ -0,0 +1,84 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'bio-maf'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 8 
     | 
    
         
            +
            options.parser = Bio::MAF::Parser
         
     | 
| 
      
 9 
     | 
    
         
            +
            options.opts = {
         
     | 
| 
      
 10 
     | 
    
         
            +
              :chunk_reader => Bio::MAF::ChunkReader,
         
     | 
| 
      
 11 
     | 
    
         
            +
              :parse_extended => false
         
     | 
| 
      
 12 
     | 
    
         
            +
            }
         
     | 
| 
      
 13 
     | 
    
         
            +
             
     | 
| 
      
 14 
     | 
    
         
            +
            PRINTERS = {
         
     | 
| 
      
 15 
     | 
    
         
            +
              'flat' => :FlatPrinter,
         
     | 
| 
      
 16 
     | 
    
         
            +
              'stack' => :CallStackPrinter
         
     | 
| 
      
 17 
     | 
    
         
            +
            }
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
            OptionParser.new do |opts|
         
     | 
| 
      
 20 
     | 
    
         
            +
              opts.banner = "Usage: maf_write [options] <maf>"
         
     | 
| 
      
 21 
     | 
    
         
            +
              opts.separator ""
         
     | 
| 
      
 22 
     | 
    
         
            +
              opts.separator "Options:"
         
     | 
| 
      
 23 
     | 
    
         
            +
              opts.on("-p", "--profile PROF", "Profile with PerfTools") do |prof|
         
     | 
| 
      
 24 
     | 
    
         
            +
                options.prof = prof
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
              opts.on("--ruby-prof PATH", "Profile with ruby-prof") do |pspec|
         
     | 
| 
      
 27 
     | 
    
         
            +
                if pspec =~ /(\w+):(.+)/
         
     | 
| 
      
 28 
     | 
    
         
            +
                  require 'ruby-prof'
         
     | 
| 
      
 29 
     | 
    
         
            +
                  options.ruby_prof_printer = RubyProf.const_get(PRINTERS.fetch($1))
         
     | 
| 
      
 30 
     | 
    
         
            +
                  options.ruby_prof_path = $2
         
     | 
| 
      
 31 
     | 
    
         
            +
                else
         
     | 
| 
      
 32 
     | 
    
         
            +
                  options.ruby_prof_printer = RubyProf::FlatPrinter
         
     | 
| 
      
 33 
     | 
    
         
            +
                  options.ruby_prof_path = pspec
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
              opts.on("--profile-gc", "Profile GC") do |prof|
         
     | 
| 
      
 37 
     | 
    
         
            +
                options.profile_gc = true
         
     | 
| 
      
 38 
     | 
    
         
            +
              end
         
     | 
| 
      
 39 
     | 
    
         
            +
              opts.on("--parser PARSER", "parser") do |name|
         
     | 
| 
      
 40 
     | 
    
         
            +
                options.parser = Bio::MAF.const_get(name)
         
     | 
| 
      
 41 
     | 
    
         
            +
              end
         
     | 
| 
      
 42 
     | 
    
         
            +
              opts.on("-t", "--threaded") do
         
     | 
| 
      
 43 
     | 
    
         
            +
                options.opts[:chunk_reader] = Bio::MAF::ThreadedChunkReader
         
     | 
| 
      
 44 
     | 
    
         
            +
                options.opts[:threads] = 1
         
     | 
| 
      
 45 
     | 
    
         
            +
              end
         
     | 
| 
      
 46 
     | 
    
         
            +
              opts.on("-e", "--extended") do
         
     | 
| 
      
 47 
     | 
    
         
            +
                options.opts[:parse_extended] = true
         
     | 
| 
      
 48 
     | 
    
         
            +
                options.opts[:parse_empty] = true
         
     | 
| 
      
 49 
     | 
    
         
            +
              end
         
     | 
| 
      
 50 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 51 
     | 
    
         
            +
             
     | 
| 
      
 52 
     | 
    
         
            +
            src_path = ARGV.shift
         
     | 
| 
      
 53 
     | 
    
         
            +
             
     | 
| 
      
 54 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 55 
     | 
    
         
            +
              require 'perftools'
         
     | 
| 
      
 56 
     | 
    
         
            +
              PerfTools::CpuProfiler.start(options.prof)
         
     | 
| 
      
 57 
     | 
    
         
            +
            elsif options.ruby_prof_path
         
     | 
| 
      
 58 
     | 
    
         
            +
              require 'ruby-prof'
         
     | 
| 
      
 59 
     | 
    
         
            +
              RubyProf.start
         
     | 
| 
      
 60 
     | 
    
         
            +
            end
         
     | 
| 
      
 61 
     | 
    
         
            +
             
     | 
| 
      
 62 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 63 
     | 
    
         
            +
              GC::Profiler.enable
         
     | 
| 
      
 64 
     | 
    
         
            +
            end
         
     | 
| 
      
 65 
     | 
    
         
            +
             
     | 
| 
      
 66 
     | 
    
         
            +
            parser = options.parser.new(src_path, options.opts)
         
     | 
| 
      
 67 
     | 
    
         
            +
            writer = Bio::MAF::Writer.new($stdout)
         
     | 
| 
      
 68 
     | 
    
         
            +
            writer.write_header(parser.header)
         
     | 
| 
      
 69 
     | 
    
         
            +
            writer.write_blocks(parser.parse_blocks)
         
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
      
 71 
     | 
    
         
            +
            if options.profile_gc
         
     | 
| 
      
 72 
     | 
    
         
            +
              $stderr.puts GC::Profiler.result
         
     | 
| 
      
 73 
     | 
    
         
            +
              GC::Profiler.disable
         
     | 
| 
      
 74 
     | 
    
         
            +
            end
         
     | 
| 
      
 75 
     | 
    
         
            +
             
     | 
| 
      
 76 
     | 
    
         
            +
            if options.prof
         
     | 
| 
      
 77 
     | 
    
         
            +
              PerfTools::CpuProfiler.stop
         
     | 
| 
      
 78 
     | 
    
         
            +
            elsif options.ruby_prof_path
         
     | 
| 
      
 79 
     | 
    
         
            +
              res = RubyProf.stop
         
     | 
| 
      
 80 
     | 
    
         
            +
              printer = options.ruby_prof_printer.new(res)
         
     | 
| 
      
 81 
     | 
    
         
            +
              File.open(options.ruby_prof_path, 'w') do |f|
         
     | 
| 
      
 82 
     | 
    
         
            +
                printer.print(f)
         
     | 
| 
      
 83 
     | 
    
         
            +
              end
         
     | 
| 
      
 84 
     | 
    
         
            +
            end
         
     | 
    
        data/bin/random_ranges
    ADDED
    
    | 
         @@ -0,0 +1,35 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            #!/usr/bin/env ruby
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            require 'optparse'
         
     | 
| 
      
 4 
     | 
    
         
            +
            require 'ostruct'
         
     | 
| 
      
 5 
     | 
    
         
            +
             
     | 
| 
      
 6 
     | 
    
         
            +
            options = OpenStruct.new
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
            op = OptionParser.new do |opts|
         
     | 
| 
      
 9 
     | 
    
         
            +
              opts.banner = "Usage: random_ranges [options]"
         
     | 
| 
      
 10 
     | 
    
         
            +
              opts.on("-r", "--range START:END", "range") do |range|
         
     | 
| 
      
 11 
     | 
    
         
            +
                s, e = range.split(':')
         
     | 
| 
      
 12 
     | 
    
         
            +
                options.start = s.to_i
         
     | 
| 
      
 13 
     | 
    
         
            +
                options.end = e.to_i
         
     | 
| 
      
 14 
     | 
    
         
            +
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
              opts.on("-l", "--length LEN", "block length") do |len|
         
     | 
| 
      
 16 
     | 
    
         
            +
                options.length = len.to_i
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
              opts.on("-n", "--number NUM", "number of blocks") do |num|
         
     | 
| 
      
 19 
     | 
    
         
            +
                options.num = num.to_i
         
     | 
| 
      
 20 
     | 
    
         
            +
              end
         
     | 
| 
      
 21 
     | 
    
         
            +
              opts.on("-s", "--sequence SEQ", "sequence") do |seq|
         
     | 
| 
      
 22 
     | 
    
         
            +
                options.seq = seq
         
     | 
| 
      
 23 
     | 
    
         
            +
              end
         
     | 
| 
      
 24 
     | 
    
         
            +
            end.parse!(ARGV)
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
      
 26 
     | 
    
         
            +
            rand = Random.new
         
     | 
| 
      
 27 
     | 
    
         
            +
            range = options.end - options.start
         
     | 
| 
      
 28 
     | 
    
         
            +
            block_range = range / options.num
         
     | 
| 
      
 29 
     | 
    
         
            +
            block_start_range = block_range - options.length
         
     | 
| 
      
 30 
     | 
    
         
            +
            (0...options.num).each do |n|
         
     | 
| 
      
 31 
     | 
    
         
            +
              block_offset = rand.rand(block_start_range)
         
     | 
| 
      
 32 
     | 
    
         
            +
              b_start = options.start + (block_range * n) + block_offset
         
     | 
| 
      
 33 
     | 
    
         
            +
              b_end = b_start + options.length
         
     | 
| 
      
 34 
     | 
    
         
            +
              puts "#{options.seq}\t#{b_start}\t#{b_end}\tx"
         
     | 
| 
      
 35 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -0,0 +1,31 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            @milestone_2
         
     | 
| 
      
 2 
     | 
    
         
            +
            Feature: Indexed access to MAF files
         
     | 
| 
      
 3 
     | 
    
         
            +
              In order to extract alignment blocks from MAF files
         
     | 
| 
      
 4 
     | 
    
         
            +
              By chromosomal ranges matching a source sequence
         
     | 
| 
      
 5 
     | 
    
         
            +
              I want to have a way to build indexes on MAF files
         
     | 
| 
      
 6 
     | 
    
         
            +
              And use indexes to efficiently find alignment blocks
         
     | 
| 
      
 7 
     | 
    
         
            +
              Because linear searches of a 200 GB file are impractical
         
     | 
| 
      
 8 
     | 
    
         
            +
             
     | 
| 
      
 9 
     | 
    
         
            +
              Scenario: Index a MAF file
         
     | 
| 
      
 10 
     | 
    
         
            +
                Given a MAF source file "mm8_chr7_tiny.maf"
         
     | 
| 
      
 11 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 12 
     | 
    
         
            +
                And build an index on the reference sequence
         
     | 
| 
      
 13 
     | 
    
         
            +
                Then the index has at least 8 entries
         
     | 
| 
      
 14 
     | 
    
         
            +
                
         
     | 
| 
      
 15 
     | 
    
         
            +
              Scenario: Extract alignment blocks by chromosomal range
         
     | 
| 
      
 16 
     | 
    
         
            +
                Given a MAF source file "mm8_chr7_tiny.maf"
         
     | 
| 
      
 17 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 18 
     | 
    
         
            +
                And build an index on the reference sequence
         
     | 
| 
      
 19 
     | 
    
         
            +
                And search for blocks between positions 80082592 and 80082766 of mm8.chr7
         
     | 
| 
      
 20 
     | 
    
         
            +
                Then 2 blocks are obtained
         
     | 
| 
      
 21 
     | 
    
         
            +
                And sequence mm8.chr7 of block 0 has start 80082592
         
     | 
| 
      
 22 
     | 
    
         
            +
                And sequence mm8.chr7 of block 1 has start 80082713
         
     | 
| 
      
 23 
     | 
    
         
            +
                
         
     | 
| 
      
 24 
     | 
    
         
            +
              Scenario: Extract alignment blocks by chromosomal range from index file
         
     | 
| 
      
 25 
     | 
    
         
            +
                Given a MAF source file "mm8_chr7_tiny.maf"
         
     | 
| 
      
 26 
     | 
    
         
            +
                And a Kyoto Cabinet index file "mm8_chr7_tiny.kct"
         
     | 
| 
      
 27 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 28 
     | 
    
         
            +
                And search for blocks between positions 80082592 and 80082766 of mm8.chr7
         
     | 
| 
      
 29 
     | 
    
         
            +
                Then 2 blocks are obtained
         
     | 
| 
      
 30 
     | 
    
         
            +
                And sequence mm8.chr7 of block 0 has start 80082592
         
     | 
| 
      
 31 
     | 
    
         
            +
                And sequence mm8.chr7 of block 1 has start 80082713
         
     | 
| 
         @@ -0,0 +1,29 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            Feature: MAF output
         
     | 
| 
      
 2 
     | 
    
         
            +
              In order to output modified MAF files or subsets of them
         
     | 
| 
      
 3 
     | 
    
         
            +
              I want to be able to write out parsed MAF data
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              Scenario: Reproduce simple test data
         
     | 
| 
      
 6 
     | 
    
         
            +
                Given a MAF source file "mm8_single.maf"
         
     | 
| 
      
 7 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 8 
     | 
    
         
            +
                And open a new MAF writer
         
     | 
| 
      
 9 
     | 
    
         
            +
                And write the header from the original MAF file
         
     | 
| 
      
 10 
     | 
    
         
            +
                And write all the parsed blocks
         
     | 
| 
      
 11 
     | 
    
         
            +
                Then the output should match, except whitespace, "mm8_single.maf"
         
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
      
 13 
     | 
    
         
            +
              Scenario: Reproduce longer test data
         
     | 
| 
      
 14 
     | 
    
         
            +
                Given a MAF source file "mm8_chr7_tiny.maf"
         
     | 
| 
      
 15 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 16 
     | 
    
         
            +
                And open a new MAF writer
         
     | 
| 
      
 17 
     | 
    
         
            +
                And write the header from the original MAF file
         
     | 
| 
      
 18 
     | 
    
         
            +
                And write all the parsed blocks
         
     | 
| 
      
 19 
     | 
    
         
            +
                Then the output should match, except whitespace, "mm8_chr7_tiny.maf"
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
              Scenario: Reproduce test data with i, e, q lines
         
     | 
| 
      
 22 
     | 
    
         
            +
                Given a MAF source file "chr22_ieq.maf"
         
     | 
| 
      
 23 
     | 
    
         
            +
                When I enable the :parse_extended parser option
         
     | 
| 
      
 24 
     | 
    
         
            +
                And I enable the :parse_empty parser option
         
     | 
| 
      
 25 
     | 
    
         
            +
                And I open it with a MAF reader
         
     | 
| 
      
 26 
     | 
    
         
            +
                And open a new MAF writer
         
     | 
| 
      
 27 
     | 
    
         
            +
                And write the header from the original MAF file
         
     | 
| 
      
 28 
     | 
    
         
            +
                And write all the parsed blocks
         
     | 
| 
      
 29 
     | 
    
         
            +
                Then the output should match, except whitespace, "chr22_ieq.maf"
         
     | 
| 
         @@ -0,0 +1,44 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            Feature: Parse MAF files
         
     | 
| 
      
 2 
     | 
    
         
            +
              In order to extract information from a MAF file
         
     | 
| 
      
 3 
     | 
    
         
            +
              I want to read it and pull out information
         
     | 
| 
      
 4 
     | 
    
         
            +
             
     | 
| 
      
 5 
     | 
    
         
            +
              Scenario: Read MAF header
         
     | 
| 
      
 6 
     | 
    
         
            +
                Given MAF data:
         
     | 
| 
      
 7 
     | 
    
         
            +
                """
         
     | 
| 
      
 8 
     | 
    
         
            +
                ##maf version=1 scoring=humor.v4
         
     | 
| 
      
 9 
     | 
    
         
            +
                # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
                a score=0.128
         
     | 
| 
      
 12 
     | 
    
         
            +
                s human_hoxa 100  8 + 100257 ACA-TTACT
         
     | 
| 
      
 13 
     | 
    
         
            +
                s horse_hoxa 120  9 -  98892 ACAATTGCT
         
     | 
| 
      
 14 
     | 
    
         
            +
                s fugu_hoxa   88  7  + 90788 ACA--TGCT
         
     | 
| 
      
 15 
     | 
    
         
            +
                """
         
     | 
| 
      
 16 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 17 
     | 
    
         
            +
                Then the MAF version should be "1"
         
     | 
| 
      
 18 
     | 
    
         
            +
                And the scoring scheme should be "humor.v4"
         
     | 
| 
      
 19 
     | 
    
         
            +
                # third line a continuation
         
     | 
| 
      
 20 
     | 
    
         
            +
                And the alignment parameters should be "humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet25/chr22.maf /cluster/data/hg15/bed/blastz.rn3/axtNet25/chr22.maf" 
         
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
      
 22 
     | 
    
         
            +
              Scenario: Read alignment block
         
     | 
| 
      
 23 
     | 
    
         
            +
                Given MAF data:
         
     | 
| 
      
 24 
     | 
    
         
            +
                """
         
     | 
| 
      
 25 
     | 
    
         
            +
                ##maf version=1 scoring=humor.v4
         
     | 
| 
      
 26 
     | 
    
         
            +
                # humor.v4 R=30 M=10 /cluster/data/hg15/bed/blastz.mm3/axtNet300/chr1.maf
         
     | 
| 
      
 27 
     | 
    
         
            +
                # /cluster/data/hg15/bed/blastz.rn3/axtNet300/chr1.maf
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
                a score=0.128
         
     | 
| 
      
 30 
     | 
    
         
            +
                s human_hoxa 100  8 + 100257 ACA-TTACT
         
     | 
| 
      
 31 
     | 
    
         
            +
                s horse_hoxa 120  9 -  98892 ACAATTGCT
         
     | 
| 
      
 32 
     | 
    
         
            +
                s fugu_hoxa   88  7  + 90788 ACA--TGCT
         
     | 
| 
      
 33 
     | 
    
         
            +
                """
         
     | 
| 
      
 34 
     | 
    
         
            +
                When I open it with a MAF reader
         
     | 
| 
      
 35 
     | 
    
         
            +
                Then an alignment block can be obtained
         
     | 
| 
      
 36 
     | 
    
         
            +
                And the alignment block has 3 sequences
         
     | 
| 
      
 37 
     | 
    
         
            +
                And sequence 0 has source "human_hoxa"
         
     | 
| 
      
 38 
     | 
    
         
            +
                And sequence 0 has start 100
         
     | 
| 
      
 39 
     | 
    
         
            +
                And sequence 0 has size 8
         
     | 
| 
      
 40 
     | 
    
         
            +
                And sequence 0 has strand :+
         
     | 
| 
      
 41 
     | 
    
         
            +
                And sequence 0 has source size 100257
         
     | 
| 
      
 42 
     | 
    
         
            +
                And sequence 0 has text "ACA-TTACT"
         
     | 
| 
      
 43 
     | 
    
         
            +
                And sequence 1 has strand :-
         
     | 
| 
      
 44 
     | 
    
         
            +
             
     |