clausewitz-spelling 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/exe/clausewitz-spellcheck +5 -0
- data/lib/clausewitz/localisation.rb +52 -19
- data/lib/clausewitz/spelling/checker.rb +66 -38
- data/lib/clausewitz/spelling/version.rb +1 -1
- metadata +1 -1
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 45dfa3f0e350c7b4b3ba9d1a6f69c970cb884134
         | 
| 4 | 
            +
              data.tar.gz: c4ffb8579ab8787115d447b11fa49cb4a3159569
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 4661cea315ffec778213bf3b67ea6322973d19ce5a6302a23b2f8afef4eb1231583df251b7b098274bfd7b32b9d3f40cf01bca6cc65efd87bc3bd8581cd73e69
         | 
| 7 | 
            +
              data.tar.gz: 44c9608f76e094a80d7ffd4924641ab65ede46b46a10a99daad2b70a4f7c9d0934df1f6b243a84deb23a71a9e21b2734e6205012f42d8c173e5cda9883bc6f97
         | 
    
        data/Gemfile.lock
    CHANGED
    
    
    
        data/exe/clausewitz-spellcheck
    CHANGED
    
    | @@ -15,6 +15,11 @@ class Main | |
| 15 15 | 
             
                  opt :suggestion_count,
         | 
| 16 16 | 
             
                    "How many suggestions to display",
         | 
| 17 17 | 
             
                    type: :int
         | 
| 18 | 
            +
                  Clausewitz::Localisation::LANG_MAP.each do |_, config|
         | 
| 19 | 
            +
                    opt "#{config.name}_dialect".to_sym,
         | 
| 20 | 
            +
                      "Select dialect for #{config.name.capitalize}",
         | 
| 21 | 
            +
                      type: :string
         | 
| 22 | 
            +
                  end
         | 
| 18 23 | 
             
                end
         | 
| 19 24 | 
             
                [opts, args]
         | 
| 20 25 | 
             
              end
         | 
| @@ -2,25 +2,58 @@ require 'yaml' | |
| 2 2 |  | 
| 3 3 | 
             
            module Clausewitz
         | 
| 4 4 | 
             
              module Localisation
         | 
| 5 | 
            +
                class LangConfig
         | 
| 6 | 
            +
                  attr_reader :name, :base, :dialects, :default_dialect
         | 
| 7 | 
            +
                  def initialize(name, base, dialects, default_dialect = nil)
         | 
| 8 | 
            +
                    @name             = name
         | 
| 9 | 
            +
                    @base             = base
         | 
| 10 | 
            +
                    @dialects         = dialects
         | 
| 11 | 
            +
                    @default_dialect  = default_dialect
         | 
| 12 | 
            +
                    @selected_dialect = @default_dialect
         | 
| 13 | 
            +
                  end
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def clausewitz_name
         | 
| 16 | 
            +
                    "l_#{@name}"
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def select_dialect(dialect)
         | 
| 20 | 
            +
                    if @dialects.include?(dialect)
         | 
| 21 | 
            +
                      @selected_dialect = dialect
         | 
| 22 | 
            +
                    else
         | 
| 23 | 
            +
                      fail("Unknown dialect override '#{dialect}'!")
         | 
| 24 | 
            +
                    end
         | 
| 25 | 
            +
                  end
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                  def full_name
         | 
| 28 | 
            +
                    @selected_dialect ? "#{@base}_#{@selected_dialect.upcase}" : @base
         | 
| 29 | 
            +
                  end
         | 
| 30 | 
            +
                end
         | 
| 31 | 
            +
             | 
| 5 32 | 
             
                LANG_MAP = {
         | 
| 6 | 
            -
                  ' | 
| 7 | 
            -
                     | 
| 8 | 
            -
             | 
| 9 | 
            -
                   | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 14 | 
            -
                   | 
| 15 | 
            -
             | 
| 16 | 
            -
                     | 
| 17 | 
            -
                   | 
| 18 | 
            -
                  ' | 
| 19 | 
            -
                     | 
| 20 | 
            -
             | 
| 21 | 
            -
                   | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 33 | 
            +
                  'l_english' => LangConfig.new(
         | 
| 34 | 
            +
                    'english',
         | 
| 35 | 
            +
                    'en', %w[gb us ca], 'gb'
         | 
| 36 | 
            +
                  ),
         | 
| 37 | 
            +
                  'l_french' => LangConfig.new(
         | 
| 38 | 
            +
                    'french',
         | 
| 39 | 
            +
                    'fr', %w[fr ca], 'fr'
         | 
| 40 | 
            +
                  ),
         | 
| 41 | 
            +
                  'l_german' => LangConfig.new(
         | 
| 42 | 
            +
                    'german',
         | 
| 43 | 
            +
                    'de', %w[de], 'de'
         | 
| 44 | 
            +
                  ),
         | 
| 45 | 
            +
                  'l_portuguese' => LangConfig.new(
         | 
| 46 | 
            +
                    'portuguese',
         | 
| 47 | 
            +
                    'pt', %w[pt br], 'pt'
         | 
| 48 | 
            +
                  ),
         | 
| 49 | 
            +
                  'l_spanish' => LangConfig.new(
         | 
| 50 | 
            +
                    'spanish',
         | 
| 51 | 
            +
                    'es', []
         | 
| 52 | 
            +
                  ),
         | 
| 53 | 
            +
                  'l_russian' => LangConfig.new(
         | 
| 54 | 
            +
                    'russian',
         | 
| 55 | 
            +
                    'ru', []
         | 
| 56 | 
            +
                  )
         | 
| 24 57 | 
             
                }
         | 
| 25 58 |  | 
| 26 59 | 
             
                def self.parse(text)
         | 
| @@ -56,7 +89,7 @@ module Clausewitz | |
| 56 89 | 
             
                  end
         | 
| 57 90 | 
             
                end
         | 
| 58 91 |  | 
| 59 | 
            -
                VALID_LANG_REGEX =  | 
| 92 | 
            +
                VALID_LANG_REGEX = /(#{LANG_MAP.keys.join('|')})/
         | 
| 60 93 | 
             
                def self.valid_lang?(lang)
         | 
| 61 94 | 
             
                  lang =~ VALID_LANG_REGEX
         | 
| 62 95 | 
             
                end
         | 
| @@ -20,6 +20,16 @@ module Clausewitz; module Spelling | |
| 20 20 | 
             
                    @dictionary_root = Pathname.new(@dictionary_root)
         | 
| 21 21 | 
             
                  end
         | 
| 22 22 |  | 
| 23 | 
            +
                  dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
         | 
| 24 | 
            +
                  dialect_opts.each do |dialect_opt_key|
         | 
| 25 | 
            +
                    next unless opts[dialect_opt_key]
         | 
| 26 | 
            +
                    next if dialect_opt_key.to_s.end_with?('given')
         | 
| 27 | 
            +
                    language_name = dialect_opt_key[/(.+)_dialect/, 1]
         | 
| 28 | 
            +
                    config = language_config(language_name)
         | 
| 29 | 
            +
                    dialect = opts[dialect_opt_key]
         | 
| 30 | 
            +
                    config.select_dialect(dialect.downcase)
         | 
| 31 | 
            +
                  end
         | 
| 32 | 
            +
             | 
| 23 33 | 
             
                  @loaded_spellcheckers = {}
         | 
| 24 34 | 
             
                  @loaded_wordlists     = {}
         | 
| 25 35 | 
             
                end
         | 
| @@ -41,22 +51,25 @@ module Clausewitz; module Spelling | |
| 41 51 | 
             
                    return UnparseableFileResult.new(filepath, e)
         | 
| 42 52 | 
             
                  end
         | 
| 43 53 |  | 
| 44 | 
            -
                  checks = contents.map do | | 
| 45 | 
            -
                     | 
| 54 | 
            +
                  checks = contents.map do |lang_name, entries|
         | 
| 55 | 
            +
                    lc = language_config(lang_name)
         | 
| 56 | 
            +
                    check_entries(lc, entries)
         | 
| 46 57 | 
             
                  end
         | 
| 47 58 | 
             
                  FileResults.new(filepath, checks)
         | 
| 48 59 | 
             
                end
         | 
| 49 60 |  | 
| 50 61 | 
             
                private
         | 
| 51 62 |  | 
| 52 | 
            -
                def check_entries( | 
| 53 | 
            -
                  wordlist = load_wordlist( | 
| 54 | 
            -
                  aspell_checker = load_aspell_checker( | 
| 63 | 
            +
                def check_entries(lc, entries)
         | 
| 64 | 
            +
                  wordlist = load_wordlist(lc)
         | 
| 65 | 
            +
                  aspell_checker = load_aspell_checker(lc)
         | 
| 55 66 | 
             
                  spellcheck_ignore = entries&.delete('spellcheck_ignore')
         | 
| 56 67 | 
             
                  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
         | 
| 57 68 | 
             
                  ignored_keys << 'spellcheck_ignore'
         | 
| 58 | 
            -
                   | 
| 59 | 
            -
             | 
| 69 | 
            +
                  if ignored_keys.include?('all')
         | 
| 70 | 
            +
                    return IgnoredLangResult.new(lc.clausewitz_name)
         | 
| 71 | 
            +
                  end
         | 
| 72 | 
            +
                  return LangResults.new(lc.clausewitz_name, []) unless entries
         | 
| 60 73 | 
             
                  checks = entries.map do |key, entry|
         | 
| 61 74 | 
             
                    if ignored_keys.include?(key)
         | 
| 62 75 | 
             
                      IgnoredEntryResult.new(key)
         | 
| @@ -64,7 +77,7 @@ module Clausewitz; module Spelling | |
| 64 77 | 
             
                      check_entry(aspell_checker, wordlist, key, entry)
         | 
| 65 78 | 
             
                    end
         | 
| 66 79 | 
             
                  end
         | 
| 67 | 
            -
                  LangResults.new( | 
| 80 | 
            +
                  LangResults.new(lc.clausewitz_name, checks)
         | 
| 68 81 | 
             
                end
         | 
| 69 82 |  | 
| 70 83 | 
             
                def check_entry(checker, wordlist, key, entry)
         | 
| @@ -73,6 +86,7 @@ module Clausewitz; module Spelling | |
| 73 86 | 
             
                  # TODO: Look into supporting escaped square brackets as part of the
         | 
| 74 87 | 
             
                  #       string.
         | 
| 75 88 | 
             
                  entry.gsub!(/\[.+\]/, '')
         | 
| 89 | 
            +
                  entry.gsub!(/\$([A-Z]|\||\d|=)+\$/, '')
         | 
| 76 90 |  | 
| 77 91 | 
             
                  # Remove other localisation bits we don't care about.
         | 
| 78 92 | 
             
                  entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
         | 
| @@ -120,7 +134,8 @@ module Clausewitz; module Spelling | |
| 120 134 | 
             
                  return if is_ordinal?(word)
         | 
| 121 135 | 
             
                  return if is_percentage?(word)
         | 
| 122 136 | 
             
                  return if is_icon?(word)
         | 
| 123 | 
            -
                  return if  | 
| 137 | 
            +
                  return if is_initial?(word)
         | 
| 138 | 
            +
                  return if is_psalm?(word)
         | 
| 124 139 | 
             
                  return if wordlist.include?(word)
         | 
| 125 140 |  | 
| 126 141 | 
             
                  if !checker.correct?(word)
         | 
| @@ -128,6 +143,14 @@ module Clausewitz; module Spelling | |
| 128 143 | 
             
                  end
         | 
| 129 144 | 
             
                end
         | 
| 130 145 |  | 
| 146 | 
            +
                def is_psalm?(word)
         | 
| 147 | 
            +
                  word =~ /^\d+:\d+$/
         | 
| 148 | 
            +
                end
         | 
| 149 | 
            +
             | 
| 150 | 
            +
                def is_initial?(word)
         | 
| 151 | 
            +
                  word =~ /^[A-Z]\.$/
         | 
| 152 | 
            +
                end
         | 
| 153 | 
            +
             | 
| 131 154 | 
             
                def suggest_words(checker, wordlist, word)
         | 
| 132 155 | 
             
                  return [] if word.size < 3
         | 
| 133 156 |  | 
| @@ -151,10 +174,6 @@ module Clausewitz; module Spelling | |
| 151 174 | 
             
                  word =~ /^£\w+/
         | 
| 152 175 | 
             
                end
         | 
| 153 176 |  | 
| 154 | 
            -
                def is_define?(word)
         | 
| 155 | 
            -
                  word =~ /^\$(\w|\|)+\$/
         | 
| 156 | 
            -
                end
         | 
| 157 | 
            -
             | 
| 158 177 | 
             
                def is_number?(word)
         | 
| 159 178 | 
             
                  Float(word) != nil rescue false
         | 
| 160 179 | 
             
                end
         | 
| @@ -171,15 +190,16 @@ module Clausewitz; module Spelling | |
| 171 190 | 
             
                # Loads our custom wordlist into a temporary Aspell dictionary.
         | 
| 172 191 | 
             
                # This way Aspell won't yell at us for custom words and will also
         | 
| 173 192 | 
             
                # potentially select from this list as suggestions for misspelled words.
         | 
| 174 | 
            -
                def load_custom_dictionary( | 
| 175 | 
            -
                  dir = Dir.mktmpdir("custom-wordlist-#{ | 
| 176 | 
            -
                  output = File.join(dir, "#{ | 
| 193 | 
            +
                def load_custom_dictionary(lc)
         | 
| 194 | 
            +
                  dir = Dir.mktmpdir("custom-wordlist-#{lc.full_name}-")
         | 
| 195 | 
            +
                  output = File.join(dir, "#{lc.full_name}-custom.wlst")
         | 
| 177 196 | 
             
                  cmd = %W[
         | 
| 178 | 
            -
                    aspell --lang=#{ | 
| 197 | 
            +
                    aspell --lang=#{lc.base} --encoding=UTF-8 create master #{output}
         | 
| 179 198 | 
             
                  ]
         | 
| 180 199 | 
             
                  value = nil
         | 
| 181 200 | 
             
                  Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr|
         | 
| 182 | 
            -
             | 
| 201 | 
            +
                    dict_path = File.join(@dictionary_root, lc.full_name, 'dict.txt')
         | 
| 202 | 
            +
            				contents = File.read(dict_path)
         | 
| 183 203 | 
             
            				words = contents.lines.map(&:chomp)
         | 
| 184 204 | 
             
                    words.each do |word|
         | 
| 185 205 | 
             
                      stdin.puts(word)
         | 
| @@ -188,36 +208,44 @@ module Clausewitz; module Spelling | |
| 188 208 | 
             
                    value = wait_thr.value
         | 
| 189 209 | 
             
                  end
         | 
| 190 210 | 
             
            			unless value.success?
         | 
| 191 | 
            -
                  	fail("Could not generate custom word list for #{ | 
| 211 | 
            +
                  	fail("Could not generate custom word list for #{lc.full_name}!")
         | 
| 192 212 | 
             
            			end
         | 
| 193 213 | 
             
                  output
         | 
| 194 214 | 
             
                end
         | 
| 195 215 |  | 
| 196 | 
            -
                def load_aspell_checker( | 
| 197 | 
            -
                   | 
| 198 | 
            -
             | 
| 199 | 
            -
             | 
| 200 | 
            -
                   | 
| 201 | 
            -
             | 
| 202 | 
            -
                   | 
| 216 | 
            +
                def load_aspell_checker(lc)
         | 
| 217 | 
            +
                  if @loaded_spellcheckers[lc.full_name]
         | 
| 218 | 
            +
                    return @loaded_spellcheckers[lc.full_name]
         | 
| 219 | 
            +
                  end
         | 
| 220 | 
            +
                  aspell_checker = FFI::Aspell::Speller.new(
         | 
| 221 | 
            +
                    lc.full_name, encoding: 'UTF-8'
         | 
| 222 | 
            +
                  )
         | 
| 203 223 | 
             
            			aspell_checker.set('ignore-accents', true)
         | 
| 204 | 
            -
                  if @dictionary_root && @dictionary_root.join( | 
| 205 | 
            -
                    custom_words = load_custom_dictionary( | 
| 224 | 
            +
                  if @dictionary_root && @dictionary_root.join(lc.full_name).exist?
         | 
| 225 | 
            +
                    custom_words = load_custom_dictionary(lc)
         | 
| 206 226 | 
             
            				aspell_checker.set('extra-dicts', custom_words)
         | 
| 207 227 | 
             
                  end
         | 
| 208 | 
            -
            			@loaded_spellcheckers[ | 
| 228 | 
            +
            			@loaded_spellcheckers[lc.full_name] = aspell_checker
         | 
| 229 | 
            +
                end
         | 
| 230 | 
            +
             | 
| 231 | 
            +
                def load_wordlist(lc)
         | 
| 232 | 
            +
                  return @loaded_wordlists[lc.full_name] if @loaded_wordlists[lc.full_name]
         | 
| 233 | 
            +
                  contents = ''
         | 
| 234 | 
            +
                  dict_path = @dictionary_root.join(lc.full_name, 'dict.txt')
         | 
| 235 | 
            +
                  if @dictionary_root && dict_path.exist?
         | 
| 236 | 
            +
            				contents = File.read(@dictionary_root.join(lc.full_name, 'dict.txt'))
         | 
| 237 | 
            +
                  end
         | 
| 238 | 
            +
                  words = contents.lines.to_a.map(&:chomp)
         | 
| 239 | 
            +
            			@loaded_wordlists[lc.full_name] = Set.new(words)
         | 
| 209 240 | 
             
                end
         | 
| 210 241 |  | 
| 211 | 
            -
                def  | 
| 212 | 
            -
                   | 
| 213 | 
            -
                  aspell_lang_config = Localisation::LANG_MAP. | 
| 214 | 
            -
                     | 
| 215 | 
            -
                  end.first
         | 
| 216 | 
            -
                  lang_code = aspell_lang_config.last[:base]
         | 
| 217 | 
            -
                  if @dictionary_root && @dictionary_root.join(lang_code).exist?
         | 
| 218 | 
            -
            				contents = File.read(@dictionary_root.join(lang_code, 'dict.txt'))
         | 
| 242 | 
            +
                def language_config(language_name)
         | 
| 243 | 
            +
                  language_name = "l_#{language_name}" if language_name !~ /^l_/
         | 
| 244 | 
            +
                  aspell_lang_config = Localisation::LANG_MAP.find do |config_key, _|
         | 
| 245 | 
            +
                    language_name == config_key
         | 
| 219 246 | 
             
                  end
         | 
| 220 | 
            -
             | 
| 247 | 
            +
                  fail("Unknown language '#{language_name}'!") unless aspell_lang_config
         | 
| 248 | 
            +
                  aspell_lang_config.last
         | 
| 221 249 | 
             
                end
         | 
| 222 250 |  | 
| 223 251 | 
             
                # Make sure a file to be checked is actually present and readable.
         |