minimap2 0.0.2 → 0.2.22.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +107 -65
- data/lib/minimap2.rb +35 -19
- data/lib/minimap2/aligner.rb +32 -13
- data/lib/minimap2/alignment.rb +6 -2
- data/lib/minimap2/ffi/constants.rb +62 -43
- data/lib/minimap2/version.rb +2 -1
- data/vendor/libminimap2.so +0 -0
- metadata +18 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 7ceb411a88448c6ed13e6d842450264e91569260c9b19d77e699a93736768522
         | 
| 4 | 
            +
              data.tar.gz: 76c1f3466375b73c54db6cd2574ffe19a817b11994d50e9e8473a209566b14f5
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 05a313f05984e2afab772da3971249327a9461a92d8b1fbcf53329ea5cd9d421e4ab8a137bd16213b86d6912bf1d43fd7f22c1bb5408a36004a64296fab294d9
         | 
| 7 | 
            +
              data.tar.gz: 7739312455aba2b14192eef634623f351fc4cea721d142b815ce41d651bc3de84e0fe413de6d24e8551ed90a2d71bcbf82a3df5b87685cb1474e6a134ed9fefe
         | 
    
        data/README.md
    CHANGED
    
    | @@ -1,20 +1,23 @@ | |
| 1 | 
            -
            #  | 
| 1 | 
            +
            # ruby-minimap2
         | 
| 2 2 |  | 
| 3 3 | 
             
            [](https://rubygems.org/gems/minimap2)
         | 
| 4 4 | 
             
            [](https://github.com/kojix2/ruby-minimap2/actions)
         | 
| 5 5 | 
             
            [](LICENSE.txt)
         | 
| 6 6 | 
             
            [](https://rubydoc.info/gems/minimap2)
         | 
| 7 | 
            +
            [](https://zenodo.org/badge/latestdoi/325711305)
         | 
| 8 | 
            +
             | 
| 9 | 
            +
             | 
| 7 10 |  | 
| 8 11 | 
             
            :dna: [minimap2](https://github.com/lh3/minimap2) - the long-read mapper - for [Ruby](https://github.com/ruby/ruby)
         | 
| 9 12 |  | 
| 10 13 | 
             
            ## Installation
         | 
| 11 14 |  | 
| 12 | 
            -
             | 
| 15 | 
            +
            Open your terminal and type the following commands in order. You need to build minimap2 on your own because you need to create a shared library that contains cmappy functions.
         | 
| 13 16 |  | 
| 14 17 | 
             
            Build
         | 
| 15 18 |  | 
| 16 19 | 
             
            ```sh
         | 
| 17 | 
            -
            git clone -- | 
| 20 | 
            +
            git clone --recursive https://github.com/kojix2/ruby-minimap2
         | 
| 18 21 | 
             
            cd ruby-minimap2
         | 
| 19 22 | 
             
            bundle install
         | 
| 20 23 | 
             
            bundle exec rake minimap2:build
         | 
| @@ -26,108 +29,136 @@ Install | |
| 26 29 | 
             
            bundle exec rake install
         | 
| 27 30 | 
             
            ```
         | 
| 28 31 |  | 
| 29 | 
            -
            Ruby-minimap2 is tested on Ubuntu and macOS. 
         | 
| 32 | 
            +
            Ruby-minimap2 is [tested on Ubuntu and macOS](https://github.com/kojix2/ruby-minimap2/actions). 
         | 
| 30 33 |  | 
| 31 34 | 
             
            ## Quick Start
         | 
| 32 35 |  | 
| 33 36 | 
             
            ```ruby
         | 
| 34 37 | 
             
            require "minimap2"
         | 
| 38 | 
            +
            ```
         | 
| 35 39 |  | 
| 36 | 
            -
             | 
| 40 | 
            +
            Create aligner
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            ```ruby
         | 
| 37 43 | 
             
            aligner = Minimap2::Aligner.new("minimap2/test/MT-human.fa")
         | 
| 44 | 
            +
            ```
         | 
| 38 45 |  | 
| 39 | 
            -
             | 
| 46 | 
            +
            Retrieve a subsequence from the index
         | 
| 47 | 
            +
             | 
| 48 | 
            +
            ```ruby
         | 
| 40 49 | 
             
            seq = aligner.seq("MT_human", 100, 200)
         | 
| 50 | 
            +
            ```
         | 
| 41 51 |  | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 51 | 
            -
             | 
| 52 | 
            -
             | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 55 | 
            -
             | 
| 56 | 
            -
             | 
| 57 | 
            -
             | 
| 58 | 
            -
             | 
| 59 | 
            -
             | 
| 60 | 
            -
             | 
| 61 | 
            -
             | 
| 62 | 
            -
             | 
| 63 | 
            -
             | 
| 52 | 
            +
            Mapping
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            ```ruby
         | 
| 55 | 
            +
            hits = aligner.align(seq)
         | 
| 56 | 
            +
            pp hits[0]
         | 
| 57 | 
            +
            ```
         | 
| 58 | 
            +
             | 
| 59 | 
            +
            ```
         | 
| 60 | 
            +
            => 
         | 
| 61 | 
            +
            #<Minimap2::Alignment:0x000055fe18223f50
         | 
| 62 | 
            +
             @blen=100,
         | 
| 63 | 
            +
             @cigar=[[100, 0]],
         | 
| 64 | 
            +
             @cigar_str="100M",
         | 
| 65 | 
            +
             @cs="",
         | 
| 66 | 
            +
             @ctg="MT_human",
         | 
| 67 | 
            +
             @ctg_len=16569,
         | 
| 68 | 
            +
             @mapq=60,
         | 
| 69 | 
            +
             @md="",
         | 
| 70 | 
            +
             @mlen=100,
         | 
| 71 | 
            +
             @nm=0,
         | 
| 72 | 
            +
             @primary=1,
         | 
| 73 | 
            +
             @q_en=100,
         | 
| 74 | 
            +
             @q_st=0,
         | 
| 75 | 
            +
             @r_en=200,
         | 
| 76 | 
            +
             @r_st=100,
         | 
| 77 | 
            +
             @read_num=1,
         | 
| 78 | 
            +
             @strand=1,
         | 
| 79 | 
            +
             @trans_strand=0>
         | 
| 64 80 | 
             
            ```
         | 
| 65 81 |  | 
| 66 82 | 
             
            ## APIs Overview
         | 
| 67 83 |  | 
| 68 | 
            -
             | 
| 84 | 
            +
            API is based on [Mappy](https://github.com/lh3/minimap2/tree/master/python), the official Python binding for Minimap2. 
         | 
| 85 | 
            +
             | 
| 86 | 
            +
            Note: `Aligner#map` has been changed to `aligne`, because `map` means iterator in Ruby.
         | 
| 69 87 |  | 
| 70 88 | 
             
            ```markdown
         | 
| 71 89 | 
             
            * Minimap2 module
         | 
| 72 | 
            -
              - fastx_read
         | 
| 73 | 
            -
              - revcomp
         | 
| 90 | 
            +
              - fastx_read                  Read fasta/fastq file.
         | 
| 91 | 
            +
              - revcomp                     Reverse complement sequence.
         | 
| 74 92 |  | 
| 75 93 | 
             
              * Aligner class
         | 
| 76 94 | 
             
                * attributes
         | 
| 77 | 
            -
                  - index
         | 
| 78 | 
            -
                  - idx_opt
         | 
| 79 | 
            -
                  - map_opt
         | 
| 95 | 
            +
                  - index                   Returns the value of attribute index.
         | 
| 96 | 
            +
                  - idx_opt                 Returns the value of attribute idx_opt.
         | 
| 97 | 
            +
                  - map_opt                 Returns the value of attribute map_opt.
         | 
| 80 98 | 
             
                * methods
         | 
| 81 | 
            -
                  - new(path, preset: nil)
         | 
| 82 | 
            -
                  - align
         | 
| 99 | 
            +
                  - new(path, preset: nil)  Create a new aligner. (presets: sr, map-pb, map-out, map-hifi, splice, asm5, etc.)
         | 
| 100 | 
            +
                  - align                   Maps and returns alignments.
         | 
| 101 | 
            +
                  - seq                     Retrieve a subsequence from the index.
         | 
| 83 102 |  | 
| 84 103 | 
             
              * Alignment class
         | 
| 85 104 | 
             
                * attributes
         | 
| 86 | 
            -
                  - ctg
         | 
| 87 | 
            -
                  - ctg_len
         | 
| 88 | 
            -
                  - r_st
         | 
| 89 | 
            -
                  - r_en
         | 
| 90 | 
            -
                  - strand
         | 
| 91 | 
            -
                  - trans_strand
         | 
| 92 | 
            -
                  - blen
         | 
| 93 | 
            -
                  - mlen
         | 
| 94 | 
            -
                  - nm
         | 
| 95 | 
            -
                  - primary
         | 
| 96 | 
            -
                  - q_st
         | 
| 97 | 
            -
                  - q_en
         | 
| 98 | 
            -
                  - mapq
         | 
| 99 | 
            -
                  - cigar
         | 
| 100 | 
            -
                  - read_num
         | 
| 101 | 
            -
                  - cs
         | 
| 102 | 
            -
                  - md
         | 
| 103 | 
            -
                  - cigar_str
         | 
| 105 | 
            +
                  - ctg                     Returns name of the reference sequence the query is mapped to.
         | 
| 106 | 
            +
                  - ctg_len                 Returns total length of the reference sequence.
         | 
| 107 | 
            +
                  - r_st                    Returns start positions on the reference.
         | 
| 108 | 
            +
                  - r_en                    Returns end positions on the reference.
         | 
| 109 | 
            +
                  - strand                  Returns +1 if on the forward strand; -1 if on the reverse strand.
         | 
| 110 | 
            +
                  - trans_strand            Returns transcript strand. +1 if on the forward strand; -1 if on the reverse strand; 0 if unknown.
         | 
| 111 | 
            +
                  - blen                    Returns length of the alignment, including both alignment matches and gaps but excluding ambiguous bases.
         | 
| 112 | 
            +
                  - mlen                    Returns length of the matching bases in the alignment, excluding ambiguous base matches.
         | 
| 113 | 
            +
                  - nm                      Returns number of mismatches, gaps and ambiguous poistions in the alignment.
         | 
| 114 | 
            +
                  - primary                 Returns if the alignment is primary (typically the best and the first to generate).
         | 
| 115 | 
            +
                  - q_st                    Returns start positions on the query.
         | 
| 116 | 
            +
                  - q_en                    Returns end positions on the query.
         | 
| 117 | 
            +
                  - mapq                    Returns mapping quality.
         | 
| 118 | 
            +
                  - cigar                   Returns CIGAR returned as an array of shape (n_cigar,2). The two numbers give the length and the operator of each CIGAR operation.
         | 
| 119 | 
            +
                  - read_num                Returns read number that the alignment corresponds to; 1 for the first read and 2 for the second read.
         | 
| 120 | 
            +
                  - cs                      Returns the cs tag.
         | 
| 121 | 
            +
                  - md                      Returns the MD tag as in the SAM format. It is an empty string unless the md argument is applied when calling Aligner#align.
         | 
| 122 | 
            +
                  - cigar_str               Returns CIGAR string.
         | 
| 104 123 | 
             
                * methods
         | 
| 105 | 
            -
                  - to_h
         | 
| 106 | 
            -
                  - to_s
         | 
| 124 | 
            +
                  - to_h                    Convert Alignment to hash.
         | 
| 125 | 
            +
                  - to_s                    Convert to the PAF format without the QueryName and QueryLength columns.
         | 
| 107 126 |  | 
| 108 | 
            -
               | 
| 109 | 
            -
                * IdxOpt class
         | 
| 110 | 
            -
                * MapOpt class
         | 
| 127 | 
            +
              ## FFI module
         | 
| 128 | 
            +
                * IdxOpt class              Indexing options.
         | 
| 129 | 
            +
                * MapOpt class              Mapping options.
         | 
| 111 130 | 
             
            ```
         | 
| 112 131 |  | 
| 113 | 
            -
             | 
| 132 | 
            +
            This is not all. See the [RubyDoc.info documentation](https://rubydoc.info/gems/minimap2/) for more details.
         | 
| 114 133 |  | 
| 115 | 
            -
             | 
| 134 | 
            +
            ruby-minimap2 is built on top of [Ruby-FFI](https://github.com/ffi/ffi). 
         | 
| 135 | 
            +
            Native functions can be called from the FFI module. FFI also provides the way to access some C structs.
         | 
| 116 136 |  | 
| 117 | 
            -
            ruby | 
| 137 | 
            +
            ```ruby
         | 
| 138 | 
            +
            aligner.idx_opt.members
         | 
| 139 | 
            +
            # => [:k, :w, :flag, :bucket_bits, :mini_batch_size, :batch_size]
         | 
| 140 | 
            +
            aligner.kds_opt.values
         | 
| 141 | 
            +
            # => [15, 10, 0, 14, 50000000, 9223372036854775807]
         | 
| 142 | 
            +
            aligner.idx_opt[:k]
         | 
| 143 | 
            +
            # => 15
         | 
| 144 | 
            +
            aligner.idx_opt[:k] = 14
         | 
| 145 | 
            +
            aligner.idx_opt[:k]
         | 
| 146 | 
            +
            # => 14
         | 
| 147 | 
            +
            ```
         | 
| 118 148 |  | 
| 119 149 | 
             
            ## Development
         | 
| 120 150 |  | 
| 121 | 
            -
            Fork your repository | 
| 151 | 
            +
            Fork your repository.
         | 
| 152 | 
            +
            then clone.
         | 
| 122 153 |  | 
| 123 154 | 
             
            ```sh
         | 
| 124 | 
            -
            git clone -- | 
| 155 | 
            +
            git clone --recursive https://github.com/kojix2/ruby-minimap2
         | 
| 125 156 | 
             
            # git clone https://github.com/kojix2/ruby-minimap2
         | 
| 126 157 | 
             
            # cd ruby-minimap2
         | 
| 127 158 | 
             
            # git submodule update -i
         | 
| 128 159 | 
             
            ```
         | 
| 129 160 |  | 
| 130 | 
            -
            Build.
         | 
| 161 | 
            +
            Build Minimap2 and Mappy.
         | 
| 131 162 |  | 
| 132 163 | 
             
            ```sh
         | 
| 133 164 | 
             
            cd ruby-minimap2
         | 
| @@ -135,6 +166,13 @@ bundle install # Install dependent packages including Ruby-FFI | |
| 135 166 | 
             
            bundle exec rake minimap2:build
         | 
| 136 167 | 
             
            ```
         | 
| 137 168 |  | 
| 169 | 
            +
            A shared library will be created in the vendor directory.
         | 
| 170 | 
            +
             | 
| 171 | 
            +
            ```
         | 
| 172 | 
            +
            └── vendor
         | 
| 173 | 
            +
               └── libminimap2.so
         | 
| 174 | 
            +
            ```
         | 
| 175 | 
            +
             | 
| 138 176 | 
             
            Run tests.
         | 
| 139 177 |  | 
| 140 178 | 
             
            ```
         | 
| @@ -155,3 +193,7 @@ ruby-minimap2 is a library under development and there are many points to be imp | |
| 155 193 | 
             
            ## License
         | 
| 156 194 |  | 
| 157 195 | 
             
            [MIT License](https://opensource.org/licenses/MIT).
         | 
| 196 | 
            +
             | 
| 197 | 
            +
            ## Acknowledgements
         | 
| 198 | 
            +
             | 
| 199 | 
            +
            I would like to thank Heng Li for making Minimap2, and all the readers who read the README to the end.
         | 
    
        data/lib/minimap2.rb
    CHANGED
    
    | @@ -34,30 +34,34 @@ module Minimap2 | |
| 34 34 |  | 
| 35 35 | 
             
              # methods from mappy
         | 
| 36 36 | 
             
              class << self
         | 
| 37 | 
            -
                #  | 
| 37 | 
            +
                # Set verbosity level.
         | 
| 38 | 
            +
                # @param [Integer] level
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                def verbose(level = -1)
         | 
| 41 | 
            +
                  FFI.mm_verbose_level(level)
         | 
| 42 | 
            +
                end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                # Read fasta/fastq file.
         | 
| 38 45 | 
             
                # @param [String] file_path
         | 
| 39 | 
            -
                # @param [Boolean]  | 
| 46 | 
            +
                # @param [Boolean] comment If True, the comment will be read.
         | 
| 40 47 | 
             
                # @yield [name, seq, qual, comment]
         | 
| 41 | 
            -
                #  | 
| 48 | 
            +
                # @return [Enumerator] enum Retrun Enumerator if not block given.
         | 
| 49 | 
            +
                # Note: You can BioRuby instead of this method.
         | 
| 42 50 |  | 
| 43 | 
            -
                def fastx_read(file_path,  | 
| 51 | 
            +
                def fastx_read(file_path, comment: false, &block)
         | 
| 44 52 | 
             
                  path = File.expand_path(file_path)
         | 
| 45 53 | 
             
                  ks = FFI.mm_fastx_open(path)
         | 
| 46 | 
            -
                   | 
| 47 | 
            -
                     | 
| 48 | 
            -
             | 
| 49 | 
            -
                     | 
| 50 | 
            -
             | 
| 51 | 
            -
                      comment  | 
| 52 | 
            -
                      yield [name, seq, qual, comment]
         | 
| 53 | 
            -
                    else
         | 
| 54 | 
            -
                      yield [name, seq, qual]
         | 
| 54 | 
            +
                  if block_given?
         | 
| 55 | 
            +
                    fastx_each(ks, comment, &block)
         | 
| 56 | 
            +
                  else
         | 
| 57 | 
            +
                    Enumerator.new do |y|
         | 
| 58 | 
            +
                      # rewind not work
         | 
| 59 | 
            +
                      fastx_each(ks, comment) { |r| y << r }
         | 
| 55 60 | 
             
                    end
         | 
| 56 61 | 
             
                  end
         | 
| 57 | 
            -
                  FFI.mm_fastx_close(ks)
         | 
| 58 62 | 
             
                end
         | 
| 59 63 |  | 
| 60 | 
            -
                #  | 
| 64 | 
            +
                # Reverse complement sequence.
         | 
| 61 65 | 
             
                # @param [String] seq
         | 
| 62 66 | 
             
                # @return [string] seq
         | 
| 63 67 |  | 
| @@ -68,11 +72,23 @@ module Minimap2 | |
| 68 72 | 
             
                  FFI.mappy_revcomp(l, bseq)
         | 
| 69 73 | 
             
                end
         | 
| 70 74 |  | 
| 71 | 
            -
                 | 
| 72 | 
            -
                # @param [Integer] level
         | 
| 75 | 
            +
                private
         | 
| 73 76 |  | 
| 74 | 
            -
                def  | 
| 75 | 
            -
                  FFI. | 
| 77 | 
            +
                def fastx_each(ks, comment)
         | 
| 78 | 
            +
                  yield fastx_next(ks, comment) while FFI.kseq_read(ks) >= 0
         | 
| 79 | 
            +
                  FFI.mm_fastx_close(ks)
         | 
| 80 | 
            +
                end
         | 
| 81 | 
            +
             | 
| 82 | 
            +
                def fastx_next(ks, read_comment)
         | 
| 83 | 
            +
                  qual = ks[:qual][:s] if (ks[:qual][:l]).positive?
         | 
| 84 | 
            +
                  name = ks[:name][:s]
         | 
| 85 | 
            +
                  seq  = ks[:seq][:s]
         | 
| 86 | 
            +
                  if read_comment
         | 
| 87 | 
            +
                    comment = ks[:comment][:s] if (ks[:comment][:l]).positive?
         | 
| 88 | 
            +
                    [name, seq, qual, comment]
         | 
| 89 | 
            +
                  else
         | 
| 90 | 
            +
                    [name, seq, qual]
         | 
| 91 | 
            +
                  end
         | 
| 76 92 | 
             
                end
         | 
| 77 93 | 
             
              end
         | 
| 78 94 | 
             
            end
         | 
    
        data/lib/minimap2/aligner.rb
    CHANGED
    
    | @@ -4,11 +4,21 @@ module Minimap2 | |
| 4 4 | 
             
              class Aligner
         | 
| 5 5 | 
             
                attr_reader :idx_opt, :map_opt, :index
         | 
| 6 6 |  | 
| 7 | 
            -
                # Create a new aligner
         | 
| 7 | 
            +
                # Create a new aligner.
         | 
| 8 8 | 
             
                #
         | 
| 9 9 | 
             
                # @param fn_idx_in [String] index or sequence file name.
         | 
| 10 10 | 
             
                # @param seq [String] a single sequence to index.
         | 
| 11 11 | 
             
                # @param preset [String] minimap2 preset.
         | 
| 12 | 
            +
                #   * map-pb : PacBio CLR genomic reads
         | 
| 13 | 
            +
                #   * map-ont : Oxford Nanopore genomic reads
         | 
| 14 | 
            +
                #   * map-hifi : PacBio HiFi/CCS genomic reads (v2.19 or later)
         | 
| 15 | 
            +
                #   * asm20 : PacBio HiFi/CCS genomic reads (v2.18 or earlier)
         | 
| 16 | 
            +
                #   * sr : short genomic paired-end reads
         | 
| 17 | 
            +
                #   * splice : spliced long reads (strand unknown)
         | 
| 18 | 
            +
                #   * splice:hq : Final PacBio Iso-seq or traditional cDNA
         | 
| 19 | 
            +
                #   * asm5 : intra-species asm-to-asm alignment
         | 
| 20 | 
            +
                #   * ava-pb : PacBio read overlap
         | 
| 21 | 
            +
                #   * ava-ont : Nanopore read overlap
         | 
| 12 22 | 
             
                # @param k [Integer] k-mer length, no larger than 28.
         | 
| 13 23 | 
             
                # @param w [Integer] minimizer window size, no larger than 255.
         | 
| 14 24 | 
             
                # @param min_cnt [Integer] mininum number of minimizers on a chain.
         | 
| @@ -80,7 +90,7 @@ module Minimap2 | |
| 80 90 | 
             
                  end
         | 
| 81 91 |  | 
| 82 92 | 
             
                  if fn_idx_in
         | 
| 83 | 
            -
                    warn  | 
| 93 | 
            +
                    warn 'Since fn_idx_in is specified, the seq argument will be ignored.' if seq
         | 
| 84 94 | 
             
                    reader = FFI.mm_idx_reader_open(fn_idx_in, idx_opt, fn_idx_out)
         | 
| 85 95 |  | 
| 86 96 | 
             
                    # The Ruby version raises an error here
         | 
| @@ -101,6 +111,7 @@ module Minimap2 | |
| 101 111 | 
             
                end
         | 
| 102 112 |  | 
| 103 113 | 
             
                # Explicitly releases the memory of the index object.
         | 
| 114 | 
            +
             | 
| 104 115 | 
             
                def free_index
         | 
| 105 116 | 
             
                  FFI.mm_idx_destroy(index) unless index.null?
         | 
| 106 117 | 
             
                end
         | 
| @@ -116,6 +127,7 @@ module Minimap2 | |
| 116 127 | 
             
                #   In the Ruby language, the name map means iterator.
         | 
| 117 128 | 
             
                #   The original name is map, but here I use the method name align.
         | 
| 118 129 | 
             
                # @note The use of Enumerator is being considered. The method names may change again.
         | 
| 130 | 
            +
                # @return [Array] alignments
         | 
| 119 131 |  | 
| 120 132 | 
             
                def align(
         | 
| 121 133 | 
             
                  seq, seq2 = nil,
         | 
| @@ -133,25 +145,29 @@ module Minimap2 | |
| 133 145 |  | 
| 134 146 | 
             
                  buf ||= FFI::TBuf.new
         | 
| 135 147 | 
             
                  km = FFI.mm_tbuf_get_km(buf)
         | 
| 136 | 
            -
                  n_regs_ptr = ::FFI::MemoryPointer.new :int
         | 
| 137 148 |  | 
| 138 | 
            -
                   | 
| 149 | 
            +
                  n_regs_ptr = ::FFI::MemoryPointer.new :int
         | 
| 150 | 
            +
                  regs_ptr = FFI.mm_map_aux(index, seq, seq2, n_regs_ptr, buf, map_opt)
         | 
| 139 151 | 
             
                  n_regs = n_regs_ptr.read_int
         | 
| 140 152 |  | 
| 141 | 
            -
                  regs = Array.new(n_regs)  | 
| 153 | 
            +
                  regs = Array.new(n_regs) do |i|
         | 
| 154 | 
            +
                    FFI::Reg1.new(regs_ptr + i * FFI::Reg1.size)
         | 
| 155 | 
            +
                  end
         | 
| 142 156 |  | 
| 143 157 | 
             
                  hit = FFI::Hit.new
         | 
| 158 | 
            +
             | 
| 144 159 | 
             
                  cs_str     = ::FFI::MemoryPointer.new(::FFI::MemoryPointer.new(:string))
         | 
| 145 160 | 
             
                  m_cs_str   = ::FFI::MemoryPointer.new :int
         | 
| 161 | 
            +
             | 
| 162 | 
            +
                  alignments = []
         | 
| 163 | 
            +
             | 
| 146 164 | 
             
                  i = 0
         | 
| 147 165 | 
             
                  begin
         | 
| 148 166 | 
             
                    while i < n_regs
         | 
| 149 167 | 
             
                      FFI.mm_reg2hitpy(index, regs[i], hit)
         | 
| 150 | 
            -
                      cigar = []
         | 
| 151 168 |  | 
| 152 169 | 
             
                      c = hit[:cigar32].read_array_of_uint32(hit[:n_cigar32])
         | 
| 153 | 
            -
                       | 
| 154 | 
            -
                      cigar = c.map { |x| [x >> 4, x & 0xf] }
         | 
| 170 | 
            +
                      cigar = c.map { |x| [x >> 4, x & 0xf] } # 32-bit CIGAR encoding -> Ruby array
         | 
| 155 171 |  | 
| 156 172 | 
             
                      _cs = ''
         | 
| 157 173 | 
             
                      if cs
         | 
| @@ -165,7 +181,7 @@ module Minimap2 | |
| 165 181 | 
             
                        _md = cs_str.read_pointer.read_string(l_cs_str)
         | 
| 166 182 | 
             
                      end
         | 
| 167 183 |  | 
| 168 | 
            -
                       | 
| 184 | 
            +
                      alignments << Alignment.new(hit, cigar, _cs, _md)
         | 
| 169 185 |  | 
| 170 186 | 
             
                      FFI.mm_free_reg1(regs[i])
         | 
| 171 187 | 
             
                      i += 1
         | 
| @@ -176,12 +192,13 @@ module Minimap2 | |
| 176 192 | 
             
                      i += 1
         | 
| 177 193 | 
             
                    end
         | 
| 178 194 | 
             
                  end
         | 
| 195 | 
            +
                  alignments
         | 
| 179 196 | 
             
                end
         | 
| 180 197 |  | 
| 181 | 
            -
                #  | 
| 182 | 
            -
                # @ | 
| 183 | 
            -
                # @ | 
| 184 | 
            -
                # @ | 
| 198 | 
            +
                # Retrieve a subsequence from the index.
         | 
| 199 | 
            +
                # @param name
         | 
| 200 | 
            +
                # @param start
         | 
| 201 | 
            +
                # @param stop
         | 
| 185 202 |  | 
| 186 203 | 
             
                def seq(name, start = 0, stop = 0x7fffffff)
         | 
| 187 204 | 
             
                  lp = ::FFI::MemoryPointer.new(:int)
         | 
| @@ -193,11 +210,13 @@ module Minimap2 | |
| 193 210 | 
             
                end
         | 
| 194 211 |  | 
| 195 212 | 
             
                # k-mer length, no larger than 28
         | 
| 213 | 
            +
             | 
| 196 214 | 
             
                def k
         | 
| 197 215 | 
             
                  index[:k]
         | 
| 198 216 | 
             
                end
         | 
| 199 217 |  | 
| 200 218 | 
             
                # minimizer window size, no larger than 255
         | 
| 219 | 
            +
             | 
| 201 220 | 
             
                def w
         | 
| 202 221 | 
             
                  index[:w]
         | 
| 203 222 | 
             
                end
         | 
    
        data/lib/minimap2/alignment.rb
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            module Minimap2
         | 
| 4 | 
            -
              # Alignment result
         | 
| 4 | 
            +
              # Alignment result.
         | 
| 5 5 | 
             
              #
         | 
| 6 6 | 
             
              # @!attribute ctg
         | 
| 7 7 | 
             
              #   @return [String] name of the reference sequence the query is mapped to.
         | 
| @@ -73,17 +73,21 @@ module Minimap2 | |
| 73 73 | 
             
                  @cs           = cs
         | 
| 74 74 | 
             
                  @md           = md
         | 
| 75 75 |  | 
| 76 | 
            -
                  @cigar_str = cigar.map { |x| x[0].to_s +  | 
| 76 | 
            +
                  @cigar_str = cigar.map { |x| x[0].to_s + FFI::CIGAR_STR[x[1]] }.join
         | 
| 77 77 | 
             
                end
         | 
| 78 78 |  | 
| 79 79 | 
             
                def primary?
         | 
| 80 80 | 
             
                  @primary == 1
         | 
| 81 81 | 
             
                end
         | 
| 82 82 |  | 
| 83 | 
            +
                # Convert Alignment to hash.
         | 
| 84 | 
            +
             | 
| 83 85 | 
             
                def to_h
         | 
| 84 86 | 
             
                  self.class.keys.map { |k| [k, __send__(k)] }.to_h
         | 
| 85 87 | 
             
                end
         | 
| 86 88 |  | 
| 89 | 
            +
                # Convert to the PAF format without the QueryName and QueryLength columns.
         | 
| 90 | 
            +
             | 
| 87 91 | 
             
                def to_s
         | 
| 88 92 | 
             
                  strand = if @strand.positive?
         | 
| 89 93 | 
             
                             '+'
         | 
| @@ -3,45 +3,60 @@ | |
| 3 3 | 
             
            module Minimap2
         | 
| 4 4 | 
             
              module FFI
         | 
| 5 5 | 
             
                # flags
         | 
| 6 | 
            -
                NO_DIAG | 
| 7 | 
            -
                NO_DUAL | 
| 8 | 
            -
                CIGAR | 
| 9 | 
            -
                OUT_SAM | 
| 10 | 
            -
                NO_QUAL | 
| 11 | 
            -
                OUT_CG | 
| 12 | 
            -
                OUT_CS | 
| 13 | 
            -
                SPLICE | 
| 14 | 
            -
                SPLICE_FOR | 
| 15 | 
            -
                SPLICE_REV | 
| 16 | 
            -
                NO_LJOIN | 
| 17 | 
            -
                OUT_CS_LONG | 
| 18 | 
            -
                SR | 
| 19 | 
            -
                FRAG_MODE | 
| 20 | 
            -
                NO_PRINT_2ND | 
| 21 | 
            -
                TWO_IO_THREADS | 
| 22 | 
            -
                LONG_CIGAR | 
| 23 | 
            -
                INDEPEND_SEG | 
| 24 | 
            -
                SPLICE_FLANK | 
| 25 | 
            -
                SOFTCLIP | 
| 26 | 
            -
                FOR_ONLY | 
| 27 | 
            -
                REV_ONLY | 
| 28 | 
            -
                HEAP_SORT | 
| 29 | 
            -
                ALL_CHAINS | 
| 30 | 
            -
                OUT_MD | 
| 31 | 
            -
                COPY_COMMENT | 
| 32 | 
            -
                EQX | 
| 33 | 
            -
                PAF_NO_HIT | 
| 34 | 
            -
                NO_END_FLT | 
| 35 | 
            -
                HARD_MLEVEL | 
| 36 | 
            -
                SAM_HIT_ONLY | 
| 37 | 
            -
             | 
| 38 | 
            -
                 | 
| 39 | 
            -
                 | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 42 | 
            -
                 | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 6 | 
            +
                NO_DIAG          = 0x001 # no exact diagonal hit
         | 
| 7 | 
            +
                NO_DUAL          = 0x002 # skip pairs where query name is lexicographically larger than target name
         | 
| 8 | 
            +
                CIGAR            = 0x004
         | 
| 9 | 
            +
                OUT_SAM          = 0x008
         | 
| 10 | 
            +
                NO_QUAL          = 0x010
         | 
| 11 | 
            +
                OUT_CG           = 0x020
         | 
| 12 | 
            +
                OUT_CS           = 0x040
         | 
| 13 | 
            +
                SPLICE           = 0x080 # splice mode
         | 
| 14 | 
            +
                SPLICE_FOR       = 0x100 # match GT-AG
         | 
| 15 | 
            +
                SPLICE_REV       = 0x200 # match CT-AC, the reverse complement of GT-AG
         | 
| 16 | 
            +
                NO_LJOIN         = 0x400
         | 
| 17 | 
            +
                OUT_CS_LONG      = 0x800
         | 
| 18 | 
            +
                SR               = 0x1000
         | 
| 19 | 
            +
                FRAG_MODE        = 0x2000
         | 
| 20 | 
            +
                NO_PRINT_2ND     = 0x4000
         | 
| 21 | 
            +
                TWO_IO_THREADS   = 0x8000 # Translator's Note. MM_F_2_IO_THREADS. Constants starting with numbers cannot be defined.
         | 
| 22 | 
            +
                LONG_CIGAR       = 0x10000
         | 
| 23 | 
            +
                INDEPEND_SEG     = 0x20000
         | 
| 24 | 
            +
                SPLICE_FLANK     = 0x40000
         | 
| 25 | 
            +
                SOFTCLIP         = 0x80000
         | 
| 26 | 
            +
                FOR_ONLY         = 0x100000
         | 
| 27 | 
            +
                REV_ONLY         = 0x200000
         | 
| 28 | 
            +
                HEAP_SORT        = 0x400000
         | 
| 29 | 
            +
                ALL_CHAINS       = 0x800000
         | 
| 30 | 
            +
                OUT_MD           = 0x1000000
         | 
| 31 | 
            +
                COPY_COMMENT     = 0x2000000
         | 
| 32 | 
            +
                EQX              = 0x4000000 # use =/X instead of M
         | 
| 33 | 
            +
                PAF_NO_HIT       = 0x8000000 # output unmapped reads to PAF
         | 
| 34 | 
            +
                NO_END_FLT       = 0x10000000
         | 
| 35 | 
            +
                HARD_MLEVEL      = 0x20000000
         | 
| 36 | 
            +
                SAM_HIT_ONLY     = 0x40000000
         | 
| 37 | 
            +
                RMQ              = 0x80000000  # LL
         | 
| 38 | 
            +
                QSTRAND          = 0x100000000 # LL
         | 
| 39 | 
            +
                NO_INV           = 0x200000000
         | 
| 40 | 
            +
             | 
| 41 | 
            +
                HPC              = 0x1
         | 
| 42 | 
            +
                NO_SEQ           = 0x2
         | 
| 43 | 
            +
                NO_NAME          = 0x4
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                IDX_MAGIC        = "MMI\2"
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                MAX_SEG          = 255
         | 
| 48 | 
            +
             | 
| 49 | 
            +
                CIGAR_MATCH      = 0
         | 
| 50 | 
            +
                CIGAR_INS        = 1
         | 
| 51 | 
            +
                CIGAR_DEL        = 2
         | 
| 52 | 
            +
                CIGAR_N_SKIP     = 3
         | 
| 53 | 
            +
                CIGAR_SOFTCLIP   = 4
         | 
| 54 | 
            +
                CIGAR_HARDCLIP   = 5
         | 
| 55 | 
            +
                CIGAR_PADDING    = 6
         | 
| 56 | 
            +
                CIGAR_EQ_MATCH   = 7
         | 
| 57 | 
            +
                CIGAR_X_MISMATCH = 8
         | 
| 58 | 
            +
             | 
| 59 | 
            +
                CIGAR_STR        = 'MIDNSHP=XB'
         | 
| 45 60 |  | 
| 46 61 | 
             
                # emulate 128-bit integers
         | 
| 47 62 | 
             
                class MM128 < ::FFI::Struct
         | 
| @@ -77,6 +92,7 @@ module Minimap2 | |
| 77 92 | 
             
                    :sdust_thres,          :int,     # score threshold for SDUST; 0 to disable
         | 
| 78 93 | 
             
                    :max_qlen,             :int,     # max query length
         | 
| 79 94 | 
             
                    :bw,                   :int,     # bandwidth
         | 
| 95 | 
            +
                    :bw_long,              :int,
         | 
| 80 96 | 
             
                    :max_gap,              :int,     # break a chain if there are no minimizers in a max_gap window
         | 
| 81 97 | 
             
                    :max_gap_ref,          :int,
         | 
| 82 98 | 
             
                    :max_frag_len,         :int,
         | 
| @@ -85,14 +101,14 @@ module Minimap2 | |
| 85 101 | 
             
                    :min_cnt,              :int,     # min number of minimizers on each chain
         | 
| 86 102 | 
             
                    :min_chain_score,      :int,     # min chaining score
         | 
| 87 103 | 
             
                    :chain_gap_scale,      :float,
         | 
| 104 | 
            +
                    :rmq_size_cap,         :int,
         | 
| 105 | 
            +
                    :rmq_inner_dist,       :int,
         | 
| 106 | 
            +
                    :rmq_rescue_size,      :int,
         | 
| 107 | 
            +
                    :rmq_rescue_ratio,     :float,
         | 
| 88 108 | 
             
                    :mask_level,           :float,
         | 
| 89 109 | 
             
                    :mask_len,             :int,
         | 
| 90 110 | 
             
                    :pri_ratio,            :float,
         | 
| 91 111 | 
             
                    :best_n,               :int,     # top best_n chains are subjected to DP alignment
         | 
| 92 | 
            -
                    :max_join_long,        :int,
         | 
| 93 | 
            -
                    :max_join_short,       :int,
         | 
| 94 | 
            -
                    :min_join_flank_sc,    :int,
         | 
| 95 | 
            -
                    :min_join_flank_ratio, :float,
         | 
| 96 112 | 
             
                    :alt_drop,             :float,
         | 
| 97 113 | 
             
                    :a,                    :int,     # matching score
         | 
| 98 114 | 
             
                    :b,                    :int,     # mismatch
         | 
| @@ -111,6 +127,8 @@ module Minimap2 | |
| 111 127 | 
             
                    :anchor_ext_len,       :int,
         | 
| 112 128 | 
             
                    :anchor_ext_shift,     :int,
         | 
| 113 129 | 
             
                    :max_clip_ratio,       :float,   # drop an alignment if BOTH ends are clipped above this ratio
         | 
| 130 | 
            +
                    :rank_min_len,         :int,
         | 
| 131 | 
            +
                    :rank_frac,            :float,
         | 
| 114 132 | 
             
                    :pe_ori,               :int,
         | 
| 115 133 | 
             
                    :pe_bonus,             :int,
         | 
| 116 134 | 
             
                    :mid_occ_frac,         :float,   # only used by mm_mapopt_update(); see below
         | 
| @@ -119,6 +137,7 @@ module Minimap2 | |
| 119 137 | 
             
                    :max_occ,              :int32_t,
         | 
| 120 138 | 
             
                    :mini_batch_size,      :int64_t, # size of a batch of query bases to process in parallel
         | 
| 121 139 | 
             
                    :max_sw_mat,           :int64_t,
         | 
| 140 | 
            +
                    :cap_kalloc,           :int64_t,
         | 
| 122 141 | 
             
                    :split_prefix,         :string
         | 
| 123 142 | 
             
                end
         | 
| 124 143 |  | 
    
        data/lib/minimap2/version.rb
    CHANGED
    
    
| Binary file | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: minimap2
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0 | 
| 4 | 
            +
              version: 0.2.22.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - kojix2
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2021- | 
| 11 | 
            +
            date: 2021-08-08 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: ffi
         | 
| @@ -38,6 +38,20 @@ dependencies: | |
| 38 38 | 
             
                - - ">="
         | 
| 39 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 40 | 
             
                    version: '0'
         | 
| 41 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 42 | 
            +
              name: irb
         | 
| 43 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 | 
            +
                requirements:
         | 
| 45 | 
            +
                - - ">="
         | 
| 46 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 47 | 
            +
                    version: '0'
         | 
| 48 | 
            +
              type: :development
         | 
| 49 | 
            +
              prerelease: false
         | 
| 50 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 | 
            +
                requirements:
         | 
| 52 | 
            +
                - - ">="
         | 
| 53 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 54 | 
            +
                    version: '0'
         | 
| 41 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 42 56 | 
             
              name: minitest
         | 
| 43 57 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| @@ -112,6 +126,7 @@ files: | |
| 112 126 | 
             
            - lib/minimap2/ffi/mappy.rb
         | 
| 113 127 | 
             
            - lib/minimap2/ffi_helper.rb
         | 
| 114 128 | 
             
            - lib/minimap2/version.rb
         | 
| 129 | 
            +
            - vendor/libminimap2.so
         | 
| 115 130 | 
             
            homepage: https://github.com/kojix2/ruby-minimap2
         | 
| 116 131 | 
             
            licenses:
         | 
| 117 132 | 
             
            - MIT
         | 
| @@ -131,7 +146,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement | |
| 131 146 | 
             
                - !ruby/object:Gem::Version
         | 
| 132 147 | 
             
                  version: '0'
         | 
| 133 148 | 
             
            requirements: []
         | 
| 134 | 
            -
            rubygems_version: 3.2. | 
| 149 | 
            +
            rubygems_version: 3.2.22
         | 
| 135 150 | 
             
            signing_key: 
         | 
| 136 151 | 
             
            specification_version: 4
         | 
| 137 152 | 
             
            summary: minimap2
         |