clausewitz-spelling 0.1.16 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -1
- data/clausewitz-spelling.gemspec +1 -0
- data/lib/clausewitz/spelling/checker.rb +35 -24
- data/lib/clausewitz/spelling/version.rb +1 -1
- metadata +15 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 8fe17c11c32b6260c764e55e2bd4d508e37bdc2f
         | 
| 4 | 
            +
              data.tar.gz: 61083f1bc484d2a8b8dbf960b5226b947e17a520
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 42e7327da7a213edae016e730acb1177415453e3d87aa9771dda061b36f7fdfd01fe05966c8a35ef1fb4a650097025ad8de2c7c5f9ac6917361059d21ac02d62
         | 
| 7 | 
            +
              data.tar.gz: a7199d624932691fdf0827c34bd7bf1cda2ff112817f27026cec2efc1bb03b5973cb8eb5ca2728f4e11f233f5ab02601db6426a9a768c9b2c2e2cc60c6463f8d
         | 
    
        data/Gemfile.lock
    CHANGED
    
    | @@ -1,11 +1,12 @@ | |
| 1 1 | 
             
            PATH
         | 
| 2 2 | 
             
              remote: .
         | 
| 3 3 | 
             
              specs:
         | 
| 4 | 
            -
                clausewitz-spelling (0.1. | 
| 4 | 
            +
                clausewitz-spelling (0.1.17)
         | 
| 5 5 | 
             
                  colorize
         | 
| 6 6 | 
             
                  damerau-levenshtein
         | 
| 7 7 | 
             
                  ffi-aspell
         | 
| 8 8 | 
             
                  optimist
         | 
| 9 | 
            +
                  pragmatic_tokenizer
         | 
| 9 10 |  | 
| 10 11 | 
             
            GEM
         | 
| 11 12 | 
             
              remote: https://rubygems.org/
         | 
| @@ -19,6 +20,8 @@ GEM | |
| 19 20 | 
             
                  ffi
         | 
| 20 21 | 
             
                method_source (0.9.2)
         | 
| 21 22 | 
             
                optimist (3.0.0)
         | 
| 23 | 
            +
                pragmatic_tokenizer (3.0.7)
         | 
| 24 | 
            +
                  unicode
         | 
| 22 25 | 
             
                pry (0.12.2)
         | 
| 23 26 | 
             
                  coderay (~> 1.1.0)
         | 
| 24 27 | 
             
                  method_source (~> 0.9.0)
         | 
| @@ -36,6 +39,7 @@ GEM | |
| 36 39 | 
             
                  diff-lcs (>= 1.2.0, < 2.0)
         | 
| 37 40 | 
             
                  rspec-support (~> 3.8.0)
         | 
| 38 41 | 
             
                rspec-support (3.8.0)
         | 
| 42 | 
            +
                unicode (0.4.4.4)
         | 
| 39 43 |  | 
| 40 44 | 
             
            PLATFORMS
         | 
| 41 45 | 
             
              ruby
         | 
    
        data/clausewitz-spelling.gemspec
    CHANGED
    
    
| @@ -5,6 +5,7 @@ require 'set' | |
| 5 5 | 
             
            require 'tmpdir'
         | 
| 6 6 | 
             
            require 'yaml'
         | 
| 7 7 | 
             
            require 'damerau-levenshtein'
         | 
| 8 | 
            +
            require 'pragmatic_tokenizer'
         | 
| 8 9 | 
             
            require 'clausewitz/localisation'
         | 
| 9 10 | 
             
            require 'clausewitz/spelling/results'
         | 
| 10 11 |  | 
| @@ -53,6 +54,7 @@ module Clausewitz; module Spelling | |
| 53 54 | 
             
                  aspell_checker = load_aspell_checker(lang)
         | 
| 54 55 | 
             
                  spellcheck_ignore = entries&.delete('spellcheck_ignore')
         | 
| 55 56 | 
             
                  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
         | 
| 57 | 
            +
                  ignored_keys << 'spellcheck_ignore'
         | 
| 56 58 | 
             
                  return IgnoredLangResult.new(lang) if ignored_keys.include?('all')
         | 
| 57 59 | 
             
                  return LangResults.new(lang, []) unless entries
         | 
| 58 60 | 
             
                  checks = entries.map do |key, entry|
         | 
| @@ -75,30 +77,39 @@ module Clausewitz; module Spelling | |
| 75 77 | 
             
                  # Remove other localisation bits we don't care about.
         | 
| 76 78 | 
             
                  entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
         | 
| 77 79 |  | 
| 78 | 
            -
                   | 
| 79 | 
            -
                   | 
| 80 | 
            -
                   | 
| 81 | 
            -
                  entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
         | 
| 82 | 
            -
             | 
| 83 | 
            -
                   | 
| 84 | 
            -
                   | 
| 85 | 
            -
                   | 
| 86 | 
            -
                   | 
| 87 | 
            -
                  words = entry.split(/\s|—/)
         | 
| 88 | 
            -
                  words.map! do |word|
         | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
             | 
| 95 | 
            -
             | 
| 96 | 
            -
             | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
             | 
| 100 | 
            -
             | 
| 101 | 
            -
                  end.join(" ")
         | 
| 80 | 
            +
                  ## We should also remove punctuation that is never part of words, like
         | 
| 81 | 
            +
                  ## exclamation points, commas, semi-colons, and question marks.
         | 
| 82 | 
            +
                  ## We should be using proper apostrophes for possessives in our loc.
         | 
| 83 | 
            +
                  #entry.gsub!(/(!|;|\?|"|“|”|…|:|\(|\))/, '')
         | 
| 84 | 
            +
             | 
| 85 | 
            +
                  ## If a word has one full stop at the end with no other full stops
         | 
| 86 | 
            +
                  ## elsewhere in the word, it's probably an acronym or initialism like
         | 
| 87 | 
            +
                  ## U.S.A. and so we should avoid stripping it. Otherwise, it's probably
         | 
| 88 | 
            +
                  ## the end of a sentence and can be ignored.
         | 
| 89 | 
            +
                  #words = entry.split(/\s|—/)
         | 
| 90 | 
            +
                  #words.map! do |word|
         | 
| 91 | 
            +
                  #  word.sub!(/^'/, '')
         | 
| 92 | 
            +
                  #  word.sub!(/'?,?'?$/, '')
         | 
| 93 | 
            +
             | 
| 94 | 
            +
                  #  if word.end_with?('...')
         | 
| 95 | 
            +
                  #    word.sub(/\.\.\.$/, '')
         | 
| 96 | 
            +
                  #  elsif word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
         | 
| 97 | 
            +
                  #    word.sub(/\.$/, '')
         | 
| 98 | 
            +
                  #  elsif word =~ /\d\.$/ && word.chars.count('.') <= 2
         | 
| 99 | 
            +
                  #    word.sub(/\.$/, '')
         | 
| 100 | 
            +
                  #  else
         | 
| 101 | 
            +
                  #    word
         | 
| 102 | 
            +
                  #  end
         | 
| 103 | 
            +
                  #end.join(" ")
         | 
| 104 | 
            +
             | 
| 105 | 
            +
                  opts = {
         | 
| 106 | 
            +
                    language: :en,
         | 
| 107 | 
            +
                    punctuation: :none,
         | 
| 108 | 
            +
                    downcase: false
         | 
| 109 | 
            +
                  }
         | 
| 110 | 
            +
                  words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
         | 
| 111 | 
            +
                  words = words.map { |word| word.split('—') }.flatten(1)
         | 
| 112 | 
            +
             | 
| 102 113 |  | 
| 103 114 | 
             
                  checks = words.map { |word| check_word(checker, wordlist, word) }.compact
         | 
| 104 115 | 
             
                  EntryResults.new(key, checks)
         | 
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: clausewitz-spelling
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.17
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Will Chappell
         | 
| @@ -122,6 +122,20 @@ dependencies: | |
| 122 122 | 
             
                - - ">="
         | 
| 123 123 | 
             
                  - !ruby/object:Gem::Version
         | 
| 124 124 | 
             
                    version: '0'
         | 
| 125 | 
            +
            - !ruby/object:Gem::Dependency
         | 
| 126 | 
            +
              name: pragmatic_tokenizer
         | 
| 127 | 
            +
              requirement: !ruby/object:Gem::Requirement
         | 
| 128 | 
            +
                requirements:
         | 
| 129 | 
            +
                - - ">="
         | 
| 130 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 131 | 
            +
                    version: '0'
         | 
| 132 | 
            +
              type: :runtime
         | 
| 133 | 
            +
              prerelease: false
         | 
| 134 | 
            +
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 135 | 
            +
                requirements:
         | 
| 136 | 
            +
                - - ">="
         | 
| 137 | 
            +
                  - !ruby/object:Gem::Version
         | 
| 138 | 
            +
                    version: '0'
         | 
| 125 139 | 
             
            description: 
         | 
| 126 140 | 
             
            email:
         | 
| 127 141 | 
             
            - wtchappell@gmail.com
         |