suika 0.1.2 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +1 -0
- data/.github/workflows/build.yml +21 -0
- data/.github/workflows/coverage.yml +26 -0
- data/.rubocop.yml +2 -1
- data/CHANGELOG.md +43 -3
- data/Gemfile +5 -1
- data/LICENSE.txt +1 -1
- data/README.md +33 -12
- data/Rakefile +75 -2
- data/Steepfile +20 -0
- data/dict/{ipadic.gz → sysdic.gz} +0 -0
- data/lib/suika.rb +1 -0
- data/lib/suika/char_def.rb +18 -14
- data/lib/suika/lattice.rb +8 -10
- data/lib/suika/node.rb +21 -0
- data/lib/suika/tagger.rb +67 -47
- data/lib/suika/version.rb +1 -1
- data/sig/suika.rbs +3 -0
- data/sig/suika/char_def.rbs +25 -0
- data/sig/suika/lattice.rbs +11 -0
- data/sig/suika/node.rbs +18 -0
- data/sig/suika/tagger.rbs +24 -0
- data/suika.gemspec +2 -3
- metadata +24 -15
- data/.travis.yml +0 -6
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA256:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 148e229070959a89197febf9bf9eabfdbe941f3c1cda66b75f87afb9371436c1
         | 
| 4 | 
            +
              data.tar.gz: c6d4fa8c654144ad39e19ff23d63161ea193bd8760eb654635d36666bed6f2dd
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: dbe9535910050678c51c3ff2d95118959d3b4cdeb4fffb2fb405ad5871258f527d1fa3f36df59706b3cf3d8c6265f19bd5844d778273fc369fc763eec293cf89
         | 
| 7 | 
            +
              data.tar.gz: 063ed20722d52ac97b4993093a60ad866fd744c8e74361b3934c1f96082523da510f8d7adcbb46e238aa03ba7dbf53539180215aa5475488294a77d92ea8633e
         | 
    
        data/.coveralls.yml
    ADDED
    
    | @@ -0,0 +1 @@ | |
| 1 | 
            +
            service_name: github-ci
         | 
| @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            name: build
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            on: [push, pull_request]
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            jobs:
         | 
| 6 | 
            +
              build:
         | 
| 7 | 
            +
                runs-on: ubuntu-latest
         | 
| 8 | 
            +
                strategy:
         | 
| 9 | 
            +
                  matrix:
         | 
| 10 | 
            +
                    ruby: [ '2.6', '2.7', '3.0' ]
         | 
| 11 | 
            +
                steps:
         | 
| 12 | 
            +
                  - uses: actions/checkout@v2
         | 
| 13 | 
            +
                  - name: Set up Ruby ${{ matrix.ruby }}
         | 
| 14 | 
            +
                    uses: actions/setup-ruby@v1
         | 
| 15 | 
            +
                    with:
         | 
| 16 | 
            +
                      ruby-version: ${{ matrix.ruby }}
         | 
| 17 | 
            +
                  - name: Build and test with Rake
         | 
| 18 | 
            +
                    run: |
         | 
| 19 | 
            +
                      gem install --no-document bundler
         | 
| 20 | 
            +
                      bundle install --jobs 4 --retry 3
         | 
| 21 | 
            +
                      bundle exec rake
         | 
| @@ -0,0 +1,26 @@ | |
| 1 | 
            +
            name: coverage
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            on:
         | 
| 4 | 
            +
              push:
         | 
| 5 | 
            +
                branches: [ main ]
         | 
| 6 | 
            +
              pull_request:
         | 
| 7 | 
            +
                branches: [ main ]
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            jobs:
         | 
| 10 | 
            +
              coverage:
         | 
| 11 | 
            +
                runs-on: ubuntu-latest
         | 
| 12 | 
            +
                steps:
         | 
| 13 | 
            +
                  - uses: actions/checkout@v2
         | 
| 14 | 
            +
                  - name: Set up Ruby 2.7
         | 
| 15 | 
            +
                    uses: actions/setup-ruby@v1
         | 
| 16 | 
            +
                    with:
         | 
| 17 | 
            +
                      ruby-version: '2.7'
         | 
| 18 | 
            +
                  - name: Build and test with Rake
         | 
| 19 | 
            +
                    run: |
         | 
| 20 | 
            +
                      gem install --no-document bundler
         | 
| 21 | 
            +
                      bundle install --jobs 4 --retry 3
         | 
| 22 | 
            +
                      bundle exec rake
         | 
| 23 | 
            +
                  - name: Coveralls GitHub Action
         | 
| 24 | 
            +
                    uses: coverallsapp/github-action@v1.1.2
         | 
| 25 | 
            +
                    with:
         | 
| 26 | 
            +
                      github-token: ${{ secrets.GITHUB_TOKEN }}
         | 
    
        data/.rubocop.yml
    CHANGED
    
    
    
        data/CHANGELOG.md
    CHANGED
    
    | @@ -1,8 +1,48 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            ## 0.3.1
         | 
| 2 | 
            +
            - Fix Tagger's inspect method not to expand instance variables for object creation on irb and pry.
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            ## 0.3.0
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            ### Changes
         | 
| 7 | 
            +
            - Add type declaration files.
         | 
| 8 | 
            +
            - Refactor to avoid assigning null to variables.
         | 
| 9 | 
            +
            - Fix some configuration files.
         | 
| 10 | 
            +
             | 
| 11 | 
            +
             | 
| 12 | 
            +
            ## 0.2.0
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            ### Breaking Change
         | 
| 15 | 
            +
            - Change to use dartsclone for trie library.
         | 
| 16 | 
            +
             | 
| 17 | 
            +
             | 
| 18 | 
            +
            ## 0.1.4
         | 
| 19 | 
            +
             | 
| 20 | 
            +
            ### Bug Fixes
         | 
| 21 | 
            +
            - Fix CharDef.char_type to return 'DEFAULT' when unknown character code is given.
         | 
| 22 | 
            +
             | 
| 23 | 
            +
            ### Features
         | 
| 24 | 
            +
            - Add character code of square era name Reiwa.
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            ## 0.1.3
         | 
| 27 | 
            +
             | 
| 28 | 
            +
            ### Bug Fixes
         | 
| 29 | 
            +
            - Fix unknown word processing.
         | 
| 30 | 
            +
             | 
| 31 | 
            +
            ### Changes
         | 
| 32 | 
            +
            - Remove redundant spaces from output.
         | 
| 33 | 
            +
             | 
| 34 | 
            +
             | 
| 35 | 
            +
            ## 0.1.2
         | 
| 36 | 
            +
             | 
| 37 | 
            +
            ### Bug Fixes
         | 
| 2 38 | 
             
            - Fix local variable typo in Tagger.parse.
         | 
| 3 39 |  | 
| 4 | 
            -
             | 
| 40 | 
            +
             | 
| 41 | 
            +
            ## 0.1.1
         | 
| 42 | 
            +
             | 
| 43 | 
            +
            ### Bug Fixes
         | 
| 5 44 | 
             
            - Fix specification of class in CharDef.char_type.
         | 
| 6 45 |  | 
| 7 | 
            -
             | 
| 46 | 
            +
             | 
| 47 | 
            +
            ## 0.1.0
         | 
| 8 48 | 
             
            - First release.
         | 
    
        data/Gemfile
    CHANGED
    
    | @@ -5,5 +5,9 @@ source 'https://rubygems.org' | |
| 5 5 | 
             
            # Specify your gem's dependencies in suika.gemspec
         | 
| 6 6 | 
             
            gemspec
         | 
| 7 7 |  | 
| 8 | 
            -
            gem 'rake', '~>  | 
| 8 | 
            +
            gem 'rake', '~> 13.0'
         | 
| 9 9 | 
             
            gem 'rspec', '~> 3.0'
         | 
| 10 | 
            +
            gem 'simplecov', '~> 0.21'
         | 
| 11 | 
            +
            gem 'simplecov-lcov', '~> 0.8'
         | 
| 12 | 
            +
            gem 'rbs', '~> 1.2'
         | 
| 13 | 
            +
            gem 'steep', '~> 0.44'
         | 
    
        data/LICENSE.txt
    CHANGED
    
    
    
        data/README.md
    CHANGED
    
    | @@ -1,7 +1,9 @@ | |
| 1 1 | 
             
            # Suika
         | 
| 2 2 |  | 
| 3 | 
            +
            [](https://github.com/yoshoku/suika/actions?query=workflow%3Abuild)
         | 
| 4 | 
            +
            [](https://coveralls.io/github/yoshoku/suika?branch=main)
         | 
| 3 5 | 
             
            [](https://badge.fury.io/rb/suika)
         | 
| 4 | 
            -
            [](https://github.com/yoshoku/suika/blob/ | 
| 6 | 
            +
            [](https://github.com/yoshoku/suika/blob/main/LICENSE.txt)
         | 
| 5 7 | 
             
            [](https://rubydoc.info/gems/suika)
         | 
| 6 8 |  | 
| 7 9 | 
             
            Suika 🍉 is a Japanese morphological analyzer written in pure Ruby.
         | 
| @@ -30,13 +32,13 @@ require 'suika' | |
| 30 32 | 
             
            tagger = Suika::Tagger.new
         | 
| 31 33 | 
             
            tagger.parse('すもももももももものうち').each { |token| puts token }
         | 
| 32 34 |  | 
| 33 | 
            -
            # すもも   | 
| 34 | 
            -
            # も       | 
| 35 | 
            -
            # もも     | 
| 36 | 
            -
            # も       | 
| 37 | 
            -
            # もも     | 
| 38 | 
            -
            # の       | 
| 39 | 
            -
            # うち     | 
| 35 | 
            +
            # すもも  名詞,一般,*,*,*,*,すもも,スモモ,スモモ
         | 
| 36 | 
            +
            # も      助詞,係助詞,*,*,*,*,も,モ,モ
         | 
| 37 | 
            +
            # もも    名詞,一般,*,*,*,*,もも,モモ,モモ
         | 
| 38 | 
            +
            # も      助詞,係助詞,*,*,*,*,も,モ,モ
         | 
| 39 | 
            +
            # もも    名詞,一般,*,*,*,*,もも,モモ,モモ
         | 
| 40 | 
            +
            # の      助詞,連体化,*,*,*,*,の,ノ,ノ
         | 
| 41 | 
            +
            # うち    名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
         | 
| 40 42 | 
             
            ```
         | 
| 41 43 |  | 
| 42 44 | 
             
            Since the Tagger class loads the binary dictionary at initialization, it is recommended to reuse the instance.
         | 
| @@ -51,17 +53,36 @@ sentences.each do |sentence| | |
| 51 53 | 
             
            end
         | 
| 52 54 | 
             
            ```
         | 
| 53 55 |  | 
| 56 | 
            +
            ## Test
         | 
| 57 | 
            +
            Suika was able to parse all sentences in the [Livedoor news corpus](https://www.rondhuit.com/download.html#ldcc)
         | 
| 58 | 
            +
            without any error.
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            ```ruby
         | 
| 61 | 
            +
            require 'suika'
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            tagger = Suika::Tagger.new
         | 
| 64 | 
            +
             | 
| 65 | 
            +
            Dir.glob('ldcc-20140209/text/*/*.txt').each do |filename|
         | 
| 66 | 
            +
              File.foreach(filename) do |sentence|
         | 
| 67 | 
            +
                sentence.strip!
         | 
| 68 | 
            +
                puts tagger.parse(sentence) unless sentence.empty?
         | 
| 69 | 
            +
              end
         | 
| 70 | 
            +
            end
         | 
| 71 | 
            +
            ```
         | 
| 72 | 
            +
             | 
| 73 | 
            +
            
         | 
| 74 | 
            +
             | 
| 54 75 | 
             
            ## Contributing
         | 
| 55 76 |  | 
| 56 77 | 
             
            Bug reports and pull requests are welcome on GitHub at https://github.com/yoshoku/suika.
         | 
| 57 | 
            -
            This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/yoshoku/suika/blob/ | 
| 78 | 
            +
            This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/yoshoku/suika/blob/main/CODE_OF_CONDUCT.md).
         | 
| 58 79 |  | 
| 59 80 | 
             
            ## License
         | 
| 60 81 |  | 
| 61 82 | 
             
            The gem is available as open source under the terms of the [BSD-3-Clause License](https://opensource.org/licenses/BSD-3-Clause).
         | 
| 62 83 | 
             
            In addition, the gem includes binary data generated from mecab-ipadic.
         | 
| 63 | 
            -
            The details of the license can be found in [LICENSE.txt](https://github.com/yoshoku/suika/blob/ | 
| 64 | 
            -
            and [NOTICE.txt](https://github.com/yoshoku/suika/blob/ | 
| 84 | 
            +
            The details of the license can be found in [LICENSE.txt](https://github.com/yoshoku/suika/blob/main/LICENSE.txt)
         | 
| 85 | 
            +
            and [NOTICE.txt](https://github.com/yoshoku/suika/blob/main/NOTICE.txt).
         | 
| 65 86 |  | 
| 66 87 | 
             
            ## Respect
         | 
| 67 88 |  | 
| @@ -74,4 +95,4 @@ Janome, a morphological analyzer written in scripting language, gives me the cou | |
| 74 95 |  | 
| 75 96 | 
             
            ## Code of Conduct
         | 
| 76 97 |  | 
| 77 | 
            -
            Everyone interacting in the Suika project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/suika/blob/ | 
| 98 | 
            +
            Everyone interacting in the Suika project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/yoshoku/suika/blob/main/CODE_OF_CONDUCT.md).
         | 
    
        data/Rakefile
    CHANGED
    
    | @@ -1,6 +1,79 @@ | |
| 1 | 
            -
            require  | 
| 2 | 
            -
            require  | 
| 1 | 
            +
            require 'bundler/gem_tasks'
         | 
| 2 | 
            +
            require 'rspec/core/rake_task'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'csv'
         | 
| 5 | 
            +
            require 'dartsclone'
         | 
| 6 | 
            +
            require 'nkf'
         | 
| 7 | 
            +
            require 'rubygems/package'
         | 
| 8 | 
            +
            require 'zlib'
         | 
| 3 9 |  | 
| 4 10 | 
             
            RSpec::Core::RakeTask.new(:spec)
         | 
| 5 11 |  | 
| 6 12 | 
             
            task :default => :spec
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            desc 'Build suika system dictionary'
         | 
| 15 | 
            +
            task :dictionary do
         | 
| 16 | 
            +
              base_dir = "#{__dir__}/dict/mecab-ipadic-2.7.0-20070801"
         | 
| 17 | 
            +
              unless File.directory?(base_dir)
         | 
| 18 | 
            +
                puts "Download mecab-ipadic file and expand that under dict directory:  #{__dir__}/dict/mecab-ipadic-2.7.0-20070801"
         | 
| 19 | 
            +
                puts
         | 
| 20 | 
            +
                puts 'Example:'
         | 
| 21 | 
            +
                puts 'wget -O dict/mecab-ipadic.tgz https://drive.google.com/uc?export=download&id=0B4y35FiV1wh7MWVlSDBCSXZMTXM'
         | 
| 22 | 
            +
                puts 'cd dict'
         | 
| 23 | 
            +
                puts 'tar xzf mecab-ipadic.tgz'
         | 
| 24 | 
            +
                puts 'cd ../'
         | 
| 25 | 
            +
                next # exit
         | 
| 26 | 
            +
              end
         | 
| 27 | 
            +
             | 
| 28 | 
            +
              File.open("#{__dir__}/dict/mecab-ipadic-2.7.0-20070801/Reiwa.csv", 'w') do |f|
         | 
| 29 | 
            +
                f.puts('令和,1288,1288,5904,名詞,固有名詞,一般,*,*,*,令和,レイワ,レイワ')
         | 
| 30 | 
            +
              end
         | 
| 31 | 
            +
             | 
| 32 | 
            +
              unknowns = {}
         | 
| 33 | 
            +
              File.open("#{base_dir}/unk.def") do |f|
         | 
| 34 | 
            +
                f.each_line do |line|
         | 
| 35 | 
            +
                  row = NKF.nkf('-w', line.chomp).split(',')
         | 
| 36 | 
            +
                  unknowns[row[0]] ||= []
         | 
| 37 | 
            +
                  unknowns[row[0]] << [row[1].to_i, row[2].to_i, row[3].to_i, *row[4..-1]]
         | 
| 38 | 
            +
                end
         | 
| 39 | 
            +
              end
         | 
| 40 | 
            +
             | 
| 41 | 
            +
              dict = {}
         | 
| 42 | 
            +
              Dir.glob("#{base_dir}/*.csv").each do |filename|
         | 
| 43 | 
            +
                File.open(filename) do |f|
         | 
| 44 | 
            +
                  f.each_line do |line|
         | 
| 45 | 
            +
                    row = NKF.nkf('-w', line.chomp).split(',')
         | 
| 46 | 
            +
                    dict[row[0]] ||= []
         | 
| 47 | 
            +
                    dict[row[0]] << [row[1].to_i, row[2].to_i, row[3].to_i, *row[4..-1]]
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
              end
         | 
| 51 | 
            +
             | 
| 52 | 
            +
              da = DartsClone::DoubleArray.new
         | 
| 53 | 
            +
              words = dict.keys.sort
         | 
| 54 | 
            +
              da.build(words)
         | 
| 55 | 
            +
              features = words.map { |w| dict[w] }
         | 
| 56 | 
            +
             | 
| 57 | 
            +
              concosts = nil
         | 
| 58 | 
            +
              File.open("#{base_dir}/matrix.def") do |f|
         | 
| 59 | 
            +
                n_entries = f.readline.chomp.split.map(&:to_i).first
         | 
| 60 | 
            +
                concosts = Array.new(n_entries) { Array.new(n_entries) }
         | 
| 61 | 
            +
                f.each_line do |line|
         | 
| 62 | 
            +
                  row, col, cost = line.chomp.split.map(&:to_i)
         | 
| 63 | 
            +
                  concosts[row][col] = cost
         | 
| 64 | 
            +
                end
         | 
| 65 | 
            +
              end
         | 
| 66 | 
            +
             | 
| 67 | 
            +
              ipadic = {
         | 
| 68 | 
            +
                trie: da.get_array,
         | 
| 69 | 
            +
                features: features,
         | 
| 70 | 
            +
                unknowns: unknowns,
         | 
| 71 | 
            +
                concosts: concosts
         | 
| 72 | 
            +
              }
         | 
| 73 | 
            +
             | 
| 74 | 
            +
              Zlib::GzipWriter.open("#{__dir__}/dict/sysdic.gz", Zlib::BEST_SPEED) { |f| f.write(Marshal.dump(ipadic)) }
         | 
| 75 | 
            +
             | 
| 76 | 
            +
              puts 'The system dictionary has been successfully built:'
         | 
| 77 | 
            +
              puts "#{__dir__}/dict/sysdic.gz"
         | 
| 78 | 
            +
              puts Digest::SHA1.file("#{__dir__}/dict/sysdic.gz").to_s
         | 
| 79 | 
            +
            end
         | 
    
        data/Steepfile
    ADDED
    
    | @@ -0,0 +1,20 @@ | |
| 1 | 
            +
            target :lib do
         | 
| 2 | 
            +
              signature "sig"
         | 
| 3 | 
            +
            #
         | 
| 4 | 
            +
              check "lib"                       # Directory name
         | 
| 5 | 
            +
            #   check "Gemfile"                   # File name
         | 
| 6 | 
            +
            #   check "app/models/**/*.rb"        # Glob
         | 
| 7 | 
            +
            #   # ignore "lib/templates/*.rb"
         | 
| 8 | 
            +
            #
         | 
| 9 | 
            +
            #   # library "pathname", "set"       # Standard libraries
         | 
| 10 | 
            +
              library "dartsclone"           # Gems
         | 
| 11 | 
            +
            end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
            # target :spec do
         | 
| 14 | 
            +
            #   signature "sig", "sig-private"
         | 
| 15 | 
            +
            #
         | 
| 16 | 
            +
            #   check "spec"
         | 
| 17 | 
            +
            #
         | 
| 18 | 
            +
            #   # library "pathname", "set"       # Standard libraries
         | 
| 19 | 
            +
            #   # library "rspec"
         | 
| 20 | 
            +
            # end
         | 
| Binary file | 
    
        data/lib/suika.rb
    CHANGED
    
    
    
        data/lib/suika/char_def.rb
    CHANGED
    
    | @@ -5,10 +5,11 @@ module Suika | |
| 5 5 | 
             
              class CharDef
         | 
| 6 6 | 
             
                # @!visibility private
         | 
| 7 7 | 
             
                def self.char_type(ch)
         | 
| 8 | 
            -
                   | 
| 9 | 
            -
                  CHAR_TYPES.find do | | 
| 10 | 
            -
                    Object.const_get("::Suika::CharDef::#{ | 
| 8 | 
            +
                  ch_code = ch.unpack1('U*')
         | 
| 9 | 
            +
                  ch_type = CHAR_TYPES.find do |ct|
         | 
| 10 | 
            +
                    Object.const_get("::Suika::CharDef::#{ct}").any? { |r| r.include?(ch_code) }
         | 
| 11 11 | 
             
                  end
         | 
| 12 | 
            +
                  ch_type || 'DEFAULT'
         | 
| 12 13 | 
             
                end
         | 
| 13 14 |  | 
| 14 15 | 
             
                # @!visibility private
         | 
| @@ -16,39 +17,41 @@ module Suika | |
| 16 17 | 
             
                  CHAR_CATEGORY[char_type(ch)]
         | 
| 17 18 | 
             
                end
         | 
| 18 19 |  | 
| 20 | 
            +
                MAX_GROUPING_SIZE = 24
         | 
| 21 | 
            +
             | 
| 19 22 | 
             
                CHAR_CATEGORY = {
         | 
| 20 23 | 
             
                  'DEFAULT' => {
         | 
| 21 | 
            -
                    invoke:  | 
| 24 | 
            +
                    invoke: false, group: true, length: 0
         | 
| 22 25 | 
             
                  },
         | 
| 23 26 | 
             
                  'SPACE' => {
         | 
| 24 | 
            -
                    invoke:  | 
| 27 | 
            +
                    invoke: false, group: true, length: 0
         | 
| 25 28 | 
             
                  },
         | 
| 26 29 | 
             
                  'KANJI' => {
         | 
| 27 | 
            -
                    invoke:  | 
| 30 | 
            +
                    invoke: false, group: false, length: 2
         | 
| 28 31 | 
             
                  },
         | 
| 29 32 | 
             
                  'SYMBOL' => {
         | 
| 30 | 
            -
                    invoke:  | 
| 33 | 
            +
                    invoke: true, group: true, length: 0
         | 
| 31 34 | 
             
                  },
         | 
| 32 35 | 
             
                  'NUMERIC' => {
         | 
| 33 | 
            -
                    invoke:  | 
| 36 | 
            +
                    invoke: true, group: true, length: 0
         | 
| 34 37 | 
             
                  },
         | 
| 35 38 | 
             
                  'ALPHA' => {
         | 
| 36 | 
            -
                    invoke:  | 
| 39 | 
            +
                    invoke: true, group: true, length: 0
         | 
| 37 40 | 
             
                  },
         | 
| 38 41 | 
             
                  'HIRAGANA' => {
         | 
| 39 | 
            -
                    invoke:  | 
| 42 | 
            +
                    invoke: false, group: true, length: 2
         | 
| 40 43 | 
             
                  },
         | 
| 41 44 | 
             
                  'KATAKANA' => {
         | 
| 42 | 
            -
                    invoke:  | 
| 45 | 
            +
                    invoke: true, group: true, length: 2
         | 
| 43 46 | 
             
                  },
         | 
| 44 47 | 
             
                  'KANJINUMERIC' => {
         | 
| 45 | 
            -
                    invoke:  | 
| 48 | 
            +
                    invoke: true, group: true, length: 0
         | 
| 46 49 | 
             
                  },
         | 
| 47 50 | 
             
                  'GREEK' => {
         | 
| 48 | 
            -
                    invoke:  | 
| 51 | 
            +
                    invoke: true, group: true, length: 0
         | 
| 49 52 | 
             
                  },
         | 
| 50 53 | 
             
                  'CYRILLIC' => {
         | 
| 51 | 
            -
                    invoke:  | 
| 54 | 
            +
                    invoke: true, group: true, length: 0
         | 
| 52 55 | 
             
                  }
         | 
| 53 56 | 
             
                }.freeze
         | 
| 54 57 |  | 
| @@ -117,6 +120,7 @@ module Suika | |
| 117 120 | 
             
                  0x2B00..0x2BFF,  # Miscellaneous Symbols and Arrows
         | 
| 118 121 | 
             
                  0x2A00..0x2AFF,  # Supplemental Mathematical Operators
         | 
| 119 122 | 
             
                  0x3300..0x33FF,
         | 
| 123 | 
            +
                  0x32FF..0x32FF,  # Square era name REIWA
         | 
| 120 124 | 
             
                  0x3200..0x32FE,  # ENclosed CJK Letters and Months
         | 
| 121 125 | 
             
                  0x3000..0x303F,  # CJK Symbol and Punctuation
         | 
| 122 126 | 
             
                  0xFE30..0xFE4F,  # CJK Compatibility Forms
         | 
    
        data/lib/suika/lattice.rb
    CHANGED
    
    | @@ -4,8 +4,6 @@ module Suika | |
| 4 4 | 
             
              # @!visibility private
         | 
| 5 5 | 
             
              class Lattice
         | 
| 6 6 | 
             
                # @!visibility private
         | 
| 7 | 
            -
                Node = Struct.new(:surface, :min_cost, :min_prev, :left_id, :right_id, :cost, :attrs, keyword_init: true)
         | 
| 8 | 
            -
             | 
| 9 7 | 
             
                attr_reader :begin_nodes, :end_nodes, :length
         | 
| 10 8 |  | 
| 11 9 | 
             
                # @!visibility private
         | 
| @@ -13,17 +11,17 @@ module Suika | |
| 13 11 | 
             
                  @length = length
         | 
| 14 12 | 
             
                  @begin_nodes = Array.new(length + 1) { [] }
         | 
| 15 13 | 
             
                  @end_nodes = Array.new(length + 1) { [] }
         | 
| 16 | 
            -
                  bos = Node.new(surface: 'BOS', left_id: 0, right_id: 0, cost: 0, attrs: [])
         | 
| 17 | 
            -
                  @end_nodes[0]. | 
| 18 | 
            -
                  eos = Node.new(surface: 'EOS', left_id: 0, right_id: 0, cost: 0, attrs: [])
         | 
| 19 | 
            -
                  @begin_nodes[length]. | 
| 14 | 
            +
                  bos = Node.new(surface: 'BOS', unknown: false, left_id: 0, right_id: 0, cost: 0, attrs: [])
         | 
| 15 | 
            +
                  @end_nodes[0].push(bos)
         | 
| 16 | 
            +
                  eos = Node.new(surface: 'EOS', unknown: false, left_id: 0, right_id: 0, cost: 0, attrs: [])
         | 
| 17 | 
            +
                  @begin_nodes[length].push(eos)
         | 
| 20 18 | 
             
                end
         | 
| 21 19 |  | 
| 22 20 | 
             
                # @!visibility private
         | 
| 23 | 
            -
                def insert(begin_id, end_id, surface, left_id, right_id, cost, attrs)
         | 
| 24 | 
            -
                  node = Node.new(surface: surface, left_id: left_id, right_id: right_id, cost: cost, attrs: attrs)
         | 
| 25 | 
            -
                  @begin_nodes[begin_id]. | 
| 26 | 
            -
                  @end_nodes[end_id]. | 
| 21 | 
            +
                def insert(begin_id, end_id, surface, unknown, left_id, right_id, cost, attrs)
         | 
| 22 | 
            +
                  node = Node.new(surface: surface, unknown: unknown, left_id: left_id, right_id: right_id, cost: cost, attrs: attrs)
         | 
| 23 | 
            +
                  @begin_nodes[begin_id].push(node)
         | 
| 24 | 
            +
                  @end_nodes[end_id].push(node)
         | 
| 27 25 | 
             
                end
         | 
| 28 26 | 
             
              end
         | 
| 29 27 | 
             
            end
         | 
    
        data/lib/suika/node.rb
    ADDED
    
    | @@ -0,0 +1,21 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            module Suika
         | 
| 4 | 
            +
              # @!visibility private
         | 
| 5 | 
            +
              class Node
         | 
| 6 | 
            +
                # @!visibility private
         | 
| 7 | 
            +
                attr_accessor :surface, :unknown, :min_cost, :min_prev, :left_id, :right_id, :cost, :attrs
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                # @!visibility private
         | 
| 10 | 
            +
                def initialize(surface: '', unknown: false, min_cost: 0, min_prev: nil, left_id: 0, right_id: 0, cost: 0, attrs: [])
         | 
| 11 | 
            +
                  @surface = surface
         | 
| 12 | 
            +
                  @unknown = unknown
         | 
| 13 | 
            +
                  @min_cost = min_cost
         | 
| 14 | 
            +
                  @min_prev = min_prev
         | 
| 15 | 
            +
                  @left_id = left_id
         | 
| 16 | 
            +
                  @right_id = right_id
         | 
| 17 | 
            +
                  @cost = cost
         | 
| 18 | 
            +
                  @attrs = attrs
         | 
| 19 | 
            +
                end
         | 
| 20 | 
            +
              end
         | 
| 21 | 
            +
            end
         | 
    
        data/lib/suika/tagger.rb
    CHANGED
    
    | @@ -1,6 +1,7 @@ | |
| 1 1 | 
             
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 | 
            -
            require ' | 
| 3 | 
            +
            require 'dartsclone'
         | 
| 4 | 
            +
            require 'rubygems/package'
         | 
| 4 5 | 
             
            require 'zlib'
         | 
| 5 6 |  | 
| 6 7 | 
             
            module Suika
         | 
| @@ -12,22 +13,22 @@ module Suika | |
| 12 13 | 
             
              #   tagger = Suika::Tagger.new
         | 
| 13 14 | 
             
              #   tagger.parse('すもももももももものうち').each { |token| puts token }
         | 
| 14 15 | 
             
              #
         | 
| 15 | 
            -
              #   # すもも   | 
| 16 | 
            -
              #   # も       | 
| 17 | 
            -
              #   # もも     | 
| 18 | 
            -
              #   # も       | 
| 19 | 
            -
              #   # もも     | 
| 20 | 
            -
              #   # の       | 
| 21 | 
            -
              #   # うち     | 
| 16 | 
            +
              #   # すもも  名詞,一般,*,*,*,*,すもも,スモモ,スモモ
         | 
| 17 | 
            +
              #   # も      助詞,係助詞,*,*,*,*,も,モ,モ
         | 
| 18 | 
            +
              #   # もも    名詞,一般,*,*,*,*,もも,モモ,モモ
         | 
| 19 | 
            +
              #   # も      助詞,係助詞,*,*,*,*,も,モ,モ
         | 
| 20 | 
            +
              #   # もも    名詞,一般,*,*,*,*,もも,モモ,モモ
         | 
| 21 | 
            +
              #   # の      助詞,連体化,*,*,*,*,の,ノ,ノ
         | 
| 22 | 
            +
              #   # うち    名詞,非自立,副詞可能,*,*,*,うち,ウチ,ウチ
         | 
| 22 23 | 
             
              #
         | 
| 23 24 | 
             
              class Tagger
         | 
| 24 25 | 
             
                # Create a new tagger by loading the built-in binary dictionary.
         | 
| 25 26 | 
             
                def initialize
         | 
| 26 | 
            -
                   | 
| 27 | 
            -
             | 
| 28 | 
            -
                  @ | 
| 29 | 
            -
                  @ | 
| 30 | 
            -
                  @ | 
| 27 | 
            +
                  raise IOError, 'SHA1 digest of dictionary file does not match.' unless DICTIONARY_KEY == Digest::SHA1.file(DICTIONARY_PATH).to_s
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                  @sysdic = Marshal.load(Zlib::GzipReader.open(DICTIONARY_PATH, &:read))
         | 
| 30 | 
            +
                  @trie = DartsClone::DoubleArray.new
         | 
| 31 | 
            +
                  @trie.set_array(@sysdic[:trie])
         | 
| 31 32 | 
             
                end
         | 
| 32 33 |  | 
| 33 34 | 
             
                # Parse the given sentence.
         | 
| @@ -39,57 +40,75 @@ module Suika | |
| 39 40 | 
             
                  terminal = sentence.length
         | 
| 40 41 |  | 
| 41 42 | 
             
                  while start < terminal
         | 
| 42 | 
            -
                     | 
| 43 | 
            -
             | 
| 44 | 
            -
                     | 
| 45 | 
            -
                     | 
| 46 | 
            -
             | 
| 47 | 
            -
             | 
| 48 | 
            -
             | 
| 49 | 
            -
             | 
| 50 | 
            -
             | 
| 43 | 
            +
                    step = terminal - start
         | 
| 44 | 
            +
             | 
| 45 | 
            +
                    query = sentence[start..-1] || ''
         | 
| 46 | 
            +
                    result = trie.common_prefix_search(query)
         | 
| 47 | 
            +
                    unless result.empty?
         | 
| 48 | 
            +
                      words, indices = result
         | 
| 49 | 
            +
                      unless words.empty?
         | 
| 50 | 
            +
                        step = INT_MAX
         | 
| 51 | 
            +
                        words.each_with_index do |word, i|
         | 
| 52 | 
            +
                          features[indices[i]].each do |el|
         | 
| 53 | 
            +
                            lattice.insert(start, start + word.length, word, false, el[0].to_i, el[1].to_i, el[2].to_i, el[3..-1])
         | 
| 54 | 
            +
                          end
         | 
| 55 | 
            +
                          step = word.length if word.length < step
         | 
| 51 56 | 
             
                        end
         | 
| 52 | 
            -
                        is_unknown = false
         | 
| 53 57 | 
             
                      end
         | 
| 54 | 
            -
                      pos += 1
         | 
| 55 | 
            -
                      word = sentence[start..pos]
         | 
| 56 | 
            -
                    end
         | 
| 57 | 
            -
             | 
| 58 | 
            -
                    unless is_unknown
         | 
| 59 | 
            -
                      start += 1
         | 
| 60 | 
            -
                      next
         | 
| 61 58 | 
             
                    end
         | 
| 62 59 |  | 
| 63 | 
            -
                    word = sentence[start]
         | 
| 64 | 
            -
                     | 
| 65 | 
            -
                     | 
| 66 | 
            -
                    if char_cate[: | 
| 67 | 
            -
                      unk_terminal = char_cate[: | 
| 60 | 
            +
                    word = sentence[start] || ''
         | 
| 61 | 
            +
                    char_cate = CharDef.char_category(sentence[start] || '')
         | 
| 62 | 
            +
                    char_type = CharDef.char_type(sentence[start] || '')
         | 
| 63 | 
            +
                    if char_cate[:invoke]
         | 
| 64 | 
            +
                      unk_terminal = start + (char_cate[:group] ? CharDef::MAX_GROUPING_SIZE : char_cate[:length])
         | 
| 65 | 
            +
                      unk_terminal = terminal if terminal < unk_terminal
         | 
| 68 66 | 
             
                      pos = start + 1
         | 
| 69 | 
            -
                      while pos < unk_terminal && char_type == CharDef.char_type(sentence[pos])
         | 
| 70 | 
            -
                        word << sentence[pos]
         | 
| 67 | 
            +
                      while pos < unk_terminal && char_type == CharDef.char_type(sentence[pos] || '')
         | 
| 68 | 
            +
                        word << (sentence[pos] || '')
         | 
| 71 69 | 
             
                        pos += 1
         | 
| 72 70 | 
             
                      end
         | 
| 73 71 | 
             
                    end
         | 
| 74 | 
            -
                     | 
| 75 | 
            -
                      lattice.insert(start, start + word.length,
         | 
| 76 | 
            -
                                      | 
| 77 | 
            -
                                     el[3..-1])
         | 
| 72 | 
            +
                    unknowns[char_type].each do |el|
         | 
| 73 | 
            +
                      lattice.insert(start, start + word.length, word, true,
         | 
| 74 | 
            +
                                     el[0].to_i, el[1].to_i, el[2].to_i, el[3..-1])
         | 
| 78 75 | 
             
                    end
         | 
| 79 | 
            -
                     | 
| 76 | 
            +
                    step = word.length if word.length < step
         | 
| 77 | 
            +
             | 
| 78 | 
            +
                    start += step
         | 
| 80 79 | 
             
                  end
         | 
| 81 80 |  | 
| 82 81 | 
             
                  viterbi(lattice)
         | 
| 83 82 | 
             
                end
         | 
| 84 83 |  | 
| 84 | 
            +
                def inspect
         | 
| 85 | 
            +
                  to_s
         | 
| 86 | 
            +
                end
         | 
| 87 | 
            +
             | 
| 85 88 | 
             
                private
         | 
| 86 89 |  | 
| 90 | 
            +
                DICTIONARY_PATH = "#{__dir__}/../../dict/sysdic.gz"
         | 
| 91 | 
            +
                DICTIONARY_KEY = 'eb921bf5e67f5733188527b21adbf9dabdda0c7a'
         | 
| 87 92 | 
             
                INT_MAX = 2**(([42].pack('i').size * 16) - 2) - 1
         | 
| 88 93 |  | 
| 89 | 
            -
                private_constant :INT_MAX
         | 
| 94 | 
            +
                private_constant :DICTIONARY_PATH, :DICTIONARY_KEY, :INT_MAX
         | 
| 95 | 
            +
             | 
| 96 | 
            +
                attr_reader :trie
         | 
| 97 | 
            +
             | 
| 98 | 
            +
                def features
         | 
| 99 | 
            +
                  @sysdic[:features]
         | 
| 100 | 
            +
                end
         | 
| 101 | 
            +
             | 
| 102 | 
            +
                def unknowns
         | 
| 103 | 
            +
                  @sysdic[:unknowns]
         | 
| 104 | 
            +
                end
         | 
| 105 | 
            +
             | 
| 106 | 
            +
                def connect_cost(r_id, l_id)
         | 
| 107 | 
            +
                  @sysdic[:concosts][r_id][l_id]
         | 
| 108 | 
            +
                end
         | 
| 90 109 |  | 
| 91 110 | 
             
                def viterbi(lattice)
         | 
| 92 | 
            -
                  bos = lattice.end_nodes[0] | 
| 111 | 
            +
                  bos = lattice.end_nodes[0][0]
         | 
| 93 112 | 
             
                  bos.min_cost = 0
         | 
| 94 113 | 
             
                  bos.min_prev = nil
         | 
| 95 114 |  | 
| @@ -98,7 +117,7 @@ module Suika | |
| 98 117 | 
             
                      rnode.min_cost = INT_MAX
         | 
| 99 118 | 
             
                      rnode.min_prev = nil
         | 
| 100 119 | 
             
                      lattice.end_nodes[n].each do |lnode|
         | 
| 101 | 
            -
                        cost = lnode.min_cost +  | 
| 120 | 
            +
                        cost = lnode.min_cost + connect_cost(lnode.right_id, rnode.left_id) + rnode.cost
         | 
| 102 121 | 
             
                        if cost < rnode.min_cost
         | 
| 103 122 | 
             
                          rnode.min_cost = cost
         | 
| 104 123 | 
             
                          rnode.min_prev = lnode
         | 
| @@ -107,13 +126,14 @@ module Suika | |
| 107 126 | 
             
                    end
         | 
| 108 127 | 
             
                  end
         | 
| 109 128 |  | 
| 110 | 
            -
                  eos = lattice.begin_nodes[-1] | 
| 129 | 
            +
                  eos = lattice.begin_nodes[-1][0]
         | 
| 111 130 | 
             
                  prev_node = eos.min_prev
         | 
| 112 131 | 
             
                  res = []
         | 
| 113 132 | 
             
                  until prev_node.nil?
         | 
| 114 | 
            -
                    res. | 
| 133 | 
            +
                    res.push("#{prev_node.surface}\t#{prev_node.attrs.join(',')}") if prev_node.surface != 'BOS' && prev_node.surface != 'EOS'
         | 
| 115 134 | 
             
                    prev_node = prev_node.min_prev
         | 
| 116 135 | 
             
                  end
         | 
| 136 | 
            +
             | 
| 117 137 | 
             
                  res.reverse
         | 
| 118 138 | 
             
                end
         | 
| 119 139 | 
             
              end
         | 
    
        data/lib/suika/version.rb
    CHANGED
    
    
    
        data/sig/suika.rbs
    ADDED
    
    
| @@ -0,0 +1,25 @@ | |
| 1 | 
            +
            module Suika
         | 
| 2 | 
            +
              class CharDef
         | 
| 3 | 
            +
                def self.char_type: (String ch) -> String
         | 
| 4 | 
            +
                def self.char_category: (String ch) -> { invoke: bool, group: bool, length: Integer }
         | 
| 5 | 
            +
             | 
| 6 | 
            +
                MAX_GROUPING_SIZE: Integer
         | 
| 7 | 
            +
             | 
| 8 | 
            +
                private
         | 
| 9 | 
            +
             | 
| 10 | 
            +
                #CHAR_CATEGORY: Hash[String, { invoke: bool, group: bool, length: Integer }]
         | 
| 11 | 
            +
                CHAR_CATEGORY: Hash[String, untyped]
         | 
| 12 | 
            +
                CHAR_TYPES: Array[String]
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                SPACE: Array[Range[Integer]]
         | 
| 15 | 
            +
                NUMERIC: Array[Range[Integer]]
         | 
| 16 | 
            +
                SYMBOL: Array[Range[Integer]]
         | 
| 17 | 
            +
                ALPHA: Array[Range[Integer]]
         | 
| 18 | 
            +
                CYRILLIC: Array[Range[Integer]]
         | 
| 19 | 
            +
                GREEK: Array[Range[Integer]]
         | 
| 20 | 
            +
                HIRAGANA: Array[Range[Integer]]
         | 
| 21 | 
            +
                KATAKANA: Array[Range[Integer]]
         | 
| 22 | 
            +
                KANJI: Array[Range[Integer]]
         | 
| 23 | 
            +
                KANJINUMERIC: Array[Range[Integer]]
         | 
| 24 | 
            +
              end
         | 
| 25 | 
            +
            end
         | 
| @@ -0,0 +1,11 @@ | |
| 1 | 
            +
            module Suika
         | 
| 2 | 
            +
              class Lattice
         | 
| 3 | 
            +
                attr_reader begin_nodes: Array[Array[::Suika::Node]]
         | 
| 4 | 
            +
                attr_reader end_nodes: Array[Array[::Suika::Node]]
         | 
| 5 | 
            +
                attr_reader length: Integer
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                def initialize: (Integer length) -> void
         | 
| 8 | 
            +
                def insert: (Integer begin_id, Integer end_id, String surface, bool unknown,
         | 
| 9 | 
            +
                             Integer left_id, Integer right_id, Integer cost, Array[String] attrs) -> void
         | 
| 10 | 
            +
              end
         | 
| 11 | 
            +
            end
         | 
    
        data/sig/suika/node.rbs
    ADDED
    
    | @@ -0,0 +1,18 @@ | |
| 1 | 
            +
            module Suika
         | 
| 2 | 
            +
              class Node
         | 
| 3 | 
            +
                attr_accessor surface: String
         | 
| 4 | 
            +
                attr_accessor unknown: bool
         | 
| 5 | 
            +
                attr_accessor min_cost: Integer
         | 
| 6 | 
            +
                # attr_accessor min_prev: ::Suika::Node?
         | 
| 7 | 
            +
                attr_accessor min_prev: untyped
         | 
| 8 | 
            +
                attr_accessor left_id: Integer
         | 
| 9 | 
            +
                attr_accessor right_id: Integer
         | 
| 10 | 
            +
                attr_accessor cost: Integer
         | 
| 11 | 
            +
                attr_accessor attrs: Array[String]
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                def initialize: (?surface: String surface, ?unknown: bool unknown,
         | 
| 14 | 
            +
                                 ?min_cost: Integer min_cost, ?min_prev: ::Suika::Node? min_prev,
         | 
| 15 | 
            +
                                 ?left_id: ::Integer left_id, ?right_id: ::Integer right_id,
         | 
| 16 | 
            +
                                 ?cost: ::Integer cost, ?attrs: Array[String] attrs) -> void
         | 
| 17 | 
            +
              end
         | 
| 18 | 
            +
            end
         | 
| @@ -0,0 +1,24 @@ | |
| 1 | 
            +
            module Suika
         | 
| 2 | 
            +
              class Tagger
         | 
| 3 | 
            +
                def initialize: () -> void
         | 
| 4 | 
            +
                def parse: (String sentence) -> Array[String]
         | 
| 5 | 
            +
                def inspect: () -> String
         | 
| 6 | 
            +
             | 
| 7 | 
            +
                private
         | 
| 8 | 
            +
             | 
| 9 | 
            +
                DICTIONARY_PATH: String
         | 
| 10 | 
            +
                DICTIONARY_KEY: String
         | 
| 11 | 
            +
                INT_MAX: untyped
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                attr_reader trie: ::DartsClone::DoubleArray
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                # type feature = [Integer, Integer, Integer, String, String, String, String, String, String, String]
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                # def features: () -> Array[Array[feature]]
         | 
| 18 | 
            +
                def features: () -> Array[Array[untyped]]
         | 
| 19 | 
            +
                # def unknowns: () -> Hash[String, Array[feature]]
         | 
| 20 | 
            +
                def unknowns: () -> Hash[String, Array[untyped]]
         | 
| 21 | 
            +
                def connect_cost: (Integer r_id, Integer l_id) -> Integer
         | 
| 22 | 
            +
                def viterbi: (::Suika::Lattice lattice) -> Array[String]
         | 
| 23 | 
            +
              end
         | 
| 24 | 
            +
            end
         | 
    
        data/suika.gemspec
    CHANGED
    
    | @@ -12,11 +12,10 @@ Gem::Specification.new do |spec| | |
| 12 12 | 
             
              spec.description   = 'Suika is a Japanese morphological analyzer written in pure Ruby.'
         | 
| 13 13 | 
             
              spec.homepage      = 'https://github.com/yoshoku/suika'
         | 
| 14 14 | 
             
              spec.license       = 'BSD-3-Clause'
         | 
| 15 | 
            -
              spec.required_ruby_version = Gem::Requirement.new('>= 2.3.0')
         | 
| 16 15 |  | 
| 17 16 | 
             
              spec.metadata['homepage_uri'] = spec.homepage
         | 
| 18 17 | 
             
              spec.metadata['source_code_uri'] = spec.homepage
         | 
| 19 | 
            -
              spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/ | 
| 18 | 
            +
              spec.metadata['changelog_uri'] = 'https://github.com/yoshoku/suika/blob/main/CHANGELOG.md'
         | 
| 20 19 | 
             
              spec.metadata['documentation_uri'] = 'https://rubydoc.info/gems/suika'
         | 
| 21 20 |  | 
| 22 21 | 
             
              # Specify which files should be added to the gem when it is released.
         | 
| @@ -28,5 +27,5 @@ Gem::Specification.new do |spec| | |
| 28 27 | 
             
              spec.executables   = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
         | 
| 29 28 | 
             
              spec.require_paths = ['lib']
         | 
| 30 29 |  | 
| 31 | 
            -
              spec.add_runtime_dependency ' | 
| 30 | 
            +
              spec.add_runtime_dependency 'dartsclone', '>= 0.2.0'
         | 
| 32 31 | 
             
            end
         | 
    
        metadata
    CHANGED
    
    | @@ -1,29 +1,29 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: suika
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1 | 
| 4 | 
            +
              version: 0.3.1
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - yoshoku
         | 
| 8 | 
            -
            autorequire:
         | 
| 8 | 
            +
            autorequire: 
         | 
| 9 9 | 
             
            bindir: exe
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date:  | 
| 11 | 
            +
            date: 2021-07-03 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 | 
            -
              name:  | 
| 14 | 
            +
              name: dartsclone
         | 
| 15 15 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 16 | 
             
                requirements:
         | 
| 17 | 
            -
                - - " | 
| 17 | 
            +
                - - ">="
         | 
| 18 18 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 | 
            -
                    version:  | 
| 19 | 
            +
                    version: 0.2.0
         | 
| 20 20 | 
             
              type: :runtime
         | 
| 21 21 | 
             
              prerelease: false
         | 
| 22 22 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 23 | 
             
                requirements:
         | 
| 24 | 
            -
                - - " | 
| 24 | 
            +
                - - ">="
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 | 
            -
                    version:  | 
| 26 | 
            +
                    version: 0.2.0
         | 
| 27 27 | 
             
            description: Suika is a Japanese morphological analyzer written in pure Ruby.
         | 
| 28 28 | 
             
            email:
         | 
| 29 29 | 
             
            - yoshoku@outlook.com
         | 
| @@ -31,10 +31,12 @@ executables: [] | |
| 31 31 | 
             
            extensions: []
         | 
| 32 32 | 
             
            extra_rdoc_files: []
         | 
| 33 33 | 
             
            files:
         | 
| 34 | 
            +
            - ".coveralls.yml"
         | 
| 35 | 
            +
            - ".github/workflows/build.yml"
         | 
| 36 | 
            +
            - ".github/workflows/coverage.yml"
         | 
| 34 37 | 
             
            - ".gitignore"
         | 
| 35 38 | 
             
            - ".rspec"
         | 
| 36 39 | 
             
            - ".rubocop.yml"
         | 
| 37 | 
            -
            - ".travis.yml"
         | 
| 38 40 | 
             
            - CHANGELOG.md
         | 
| 39 41 | 
             
            - CODE_OF_CONDUCT.md
         | 
| 40 42 | 
             
            - Gemfile
         | 
| @@ -42,14 +44,21 @@ files: | |
| 42 44 | 
             
            - NOTICE.txt
         | 
| 43 45 | 
             
            - README.md
         | 
| 44 46 | 
             
            - Rakefile
         | 
| 47 | 
            +
            - Steepfile
         | 
| 45 48 | 
             
            - bin/console
         | 
| 46 49 | 
             
            - bin/setup
         | 
| 47 | 
            -
            - dict/ | 
| 50 | 
            +
            - dict/sysdic.gz
         | 
| 48 51 | 
             
            - lib/suika.rb
         | 
| 49 52 | 
             
            - lib/suika/char_def.rb
         | 
| 50 53 | 
             
            - lib/suika/lattice.rb
         | 
| 54 | 
            +
            - lib/suika/node.rb
         | 
| 51 55 | 
             
            - lib/suika/tagger.rb
         | 
| 52 56 | 
             
            - lib/suika/version.rb
         | 
| 57 | 
            +
            - sig/suika.rbs
         | 
| 58 | 
            +
            - sig/suika/char_def.rbs
         | 
| 59 | 
            +
            - sig/suika/lattice.rbs
         | 
| 60 | 
            +
            - sig/suika/node.rbs
         | 
| 61 | 
            +
            - sig/suika/tagger.rbs
         | 
| 53 62 | 
             
            - suika.gemspec
         | 
| 54 63 | 
             
            homepage: https://github.com/yoshoku/suika
         | 
| 55 64 | 
             
            licenses:
         | 
| @@ -57,9 +66,9 @@ licenses: | |
| 57 66 | 
             
            metadata:
         | 
| 58 67 | 
             
              homepage_uri: https://github.com/yoshoku/suika
         | 
| 59 68 | 
             
              source_code_uri: https://github.com/yoshoku/suika
         | 
| 60 | 
            -
              changelog_uri: https://github.com/yoshoku/ | 
| 69 | 
            +
              changelog_uri: https://github.com/yoshoku/suika/blob/main/CHANGELOG.md
         | 
| 61 70 | 
             
              documentation_uri: https://rubydoc.info/gems/suika
         | 
| 62 | 
            -
            post_install_message:
         | 
| 71 | 
            +
            post_install_message: 
         | 
| 63 72 | 
             
            rdoc_options: []
         | 
| 64 73 | 
             
            require_paths:
         | 
| 65 74 | 
             
            - lib
         | 
| @@ -67,15 +76,15 @@ required_ruby_version: !ruby/object:Gem::Requirement | |
| 67 76 | 
             
              requirements:
         | 
| 68 77 | 
             
              - - ">="
         | 
| 69 78 | 
             
                - !ruby/object:Gem::Version
         | 
| 70 | 
            -
                  version:  | 
| 79 | 
            +
                  version: '0'
         | 
| 71 80 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 72 81 | 
             
              requirements:
         | 
| 73 82 | 
             
              - - ">="
         | 
| 74 83 | 
             
                - !ruby/object:Gem::Version
         | 
| 75 84 | 
             
                  version: '0'
         | 
| 76 85 | 
             
            requirements: []
         | 
| 77 | 
            -
            rubygems_version: 3. | 
| 78 | 
            -
            signing_key:
         | 
| 86 | 
            +
            rubygems_version: 3.2.21
         | 
| 87 | 
            +
            signing_key: 
         | 
| 79 88 | 
             
            specification_version: 4
         | 
| 80 89 | 
             
            summary: Suika is a Japanese morphological analyzer written in pure Ruby.
         | 
| 81 90 | 
             
            test_files: []
         |