clausewitz-spelling 0.1.19 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 55b8f238c7ccf5a99e18aea147d865baa3eba23e
4
- data.tar.gz: 737b15a9ea8c6dfb3b1279d1170986aa9b080e8f
2
+ SHA256:
3
+ metadata.gz: c3f825db29cf16aa71180113fdba663b417a44cab29a2cd9d4c2a447c3061ae3
4
+ data.tar.gz: ae39cd480cffe2f0a3ff6cb673c00c53d0d86def3136b274806dc9cacadff860
5
5
  SHA512:
6
- metadata.gz: 523a1c3c5cbed97de4454eca5422a181846544338e7fc108457f924f2a9a3ba9f3b1b1329679e5358f958b12b10d94972bc6a05767fc5feacf7b9bb804444ce5
7
- data.tar.gz: 3206f38862471223cc8fd4096cb398b1b42864b15ef8a60bb82de1e507f4bf497bfb86e7e46fbbe57d8eb643141f18d4e42c2f22940314e36c3e6b82feaa4cf6
6
+ metadata.gz: e1880d26258c57c61d8ba8a1b9b3177e998657cef265411a09c9f5fb95616e370386e3286112e468af581ea8cda235cd4483e95e8c525303598d9377dff363e0
7
+ data.tar.gz: afcd28a0b5f6250b5d4d2f6ec90b443feda4017be1543fb2466f485b261c1b256cd2b6875c26fac95e23d5c571e4a000b95d2738501b78be78c88ee2649ea301
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- clausewitz-spelling (0.1.19)
4
+ clausewitz-spelling (0.2.0)
5
5
  colorize
6
6
  damerau-levenshtein
7
- ffi-aspell
7
+ ffi-hunspell-wtchappell
8
8
  optimist
9
9
  pragmatic_tokenizer
10
10
 
@@ -16,8 +16,8 @@ GEM
16
16
  damerau-levenshtein (1.3.1)
17
17
  diff-lcs (1.3)
18
18
  ffi (1.10.0)
19
- ffi-aspell (1.1.0)
20
- ffi
19
+ ffi-hunspell-wtchappell (0.4.0)
20
+ ffi (~> 1.0)
21
21
  method_source (0.9.2)
22
22
  optimist (3.0.0)
23
23
  pragmatic_tokenizer (3.0.7)
@@ -45,11 +45,11 @@ PLATFORMS
45
45
  ruby
46
46
 
47
47
  DEPENDENCIES
48
- bundler (~> 1.16)
48
+ bundler (~> 1.17.2)
49
49
  clausewitz-spelling!
50
50
  pry
51
51
  rake (~> 10.0)
52
52
  rspec (~> 3.0)
53
53
 
54
54
  BUNDLED WITH
55
- 1.16.1
55
+ 1.17.2
@@ -20,12 +20,12 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.add_development_dependency "bundler", "~> 1.16"
23
+ spec.add_development_dependency "bundler", "~> 1.17.2"
24
24
  spec.add_development_dependency "rake", "~> 10.0"
25
25
  spec.add_development_dependency "rspec", "~> 3.0"
26
26
  spec.add_development_dependency "pry"
27
27
 
28
- spec.add_dependency "ffi-aspell"
28
+ spec.add_dependency "ffi-hunspell-wtchappell"
29
29
  spec.add_dependency "optimist"
30
30
  spec.add_dependency "colorize"
31
31
  spec.add_dependency "damerau-levenshtein"
data/en_GB_owb.dic ADDED
@@ -0,0 +1,81 @@
1
+ 35
2
+ Adytum/1
3
+ Almirante/1
4
+ APC/2 1
5
+ Allgood/1
6
+ Aradesh/1
7
+ Bearport/1
8
+ Boneyard/1
9
+ Chico/1
10
+ Ciudad/1
11
+ Colibrí/1
12
+ Darkwater/1
13
+ Dayglow/1
14
+ Elijah/1
15
+ Elijah's
16
+ FEV/1
17
+ GAI/1
18
+ Gizmo/1
19
+ Harbor/1
20
+ Hardin/2 1
21
+ Hardin's
22
+ Harlon/1
23
+ Héctor/1
24
+ Henderton/1
25
+ Hubology
26
+ Hubologist/2 1
27
+ Junktown/1
28
+ Klamath/1
29
+ Los
30
+ Maxson/1
31
+ Morbid/1
32
+ Mossman/1
33
+ NCR/1
34
+ Paz/1
35
+ Pesca/1
36
+ Peterson/1
37
+ Petro/1
38
+ Rafael/1
39
+ Rattletail/1
40
+ Reynosa/1
41
+ RobCo/1
42
+ Santángel/1
43
+ Seabear/1
44
+ Shi/1
45
+ Silus/1
46
+ Tamaulipas/1
47
+ Tampico/1
48
+ Tandi/1
49
+ Tech
50
+ Tlaloc/1
51
+ TODO/1
52
+ West-Tek/1
53
+ Valles/1
54
+ Vandenberg/1
55
+ Vault-Tec/1
56
+ bandito/2 1
57
+ brahmin/5 1
58
+ caudillo/2 1
59
+ commonfolk/5 1
60
+ deathclaw/2 1
61
+ doodad/2 1
62
+ eyebot/2
63
+ firefight/2 1
64
+ firepower/5 1
65
+ hardline/2 1
66
+ lakebed/2 1
67
+ malcontents
68
+ military's
69
+ playbook/2 1
70
+ protectron/2 1
71
+ radscorpion/2 1
72
+ robobrain/2 1
73
+ securitron/2 1
74
+ shopfront/2 1
75
+ superweapon/2 1
76
+ tribals/5 1
77
+ underway
78
+ unpowered
79
+ vertibird/2 1
80
+ weaponsmith/2 1
81
+ wildcard/2 1
@@ -9,8 +9,11 @@ class Main
9
9
 
10
10
  def parse_args(args)
11
11
  opts = Optimist::options(args) do
12
- opt :dictionary_root,
13
- "Directory containing per-language custom word lists",
12
+ opt :custom_dicts,
13
+ "List of custom dictionaries to load",
14
+ type: :strings
15
+ opt :custom_dict_root,
16
+ "Directory containing per-language-dialect custom dicts",
14
17
  type: :string
15
18
  opt :suggestion_count,
16
19
  "How many suggestions to display",
@@ -21,7 +24,23 @@ class Main
21
24
  type: :string
22
25
  end
23
26
  end
24
- [opts, args]
27
+
28
+ dialect_map = {}
29
+ dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
30
+ dialect_opts.each do |dialect_opt_key|
31
+ next unless opts[dialect_opt_key]
32
+ next if dialect_opt_key.to_s.end_with?('given')
33
+ language_name = dialect_opt_key[/(.+)_dialect/, 1]
34
+ dialect_map[language_name] = opts[dialect_opt_key]
35
+ end
36
+
37
+ checker_opts = {}
38
+ checker_opts[:custom_dict_root] = opts[:custom_dict_root]
39
+ checker_opts[:custom_dicts] = opts[:custom_dicts]
40
+ checker_opts[:dialect_map] = dialect_map
41
+ checker_opts[:suggestion_count] = opts[:suggestion_count]
42
+
43
+ [checker_opts, args]
25
44
  end
26
45
 
27
46
  def run
@@ -17,7 +17,7 @@ module Clausewitz
17
17
  end
18
18
 
19
19
  def select_dialect(dialect)
20
- if @dialects.include?(dialect)
20
+ if @dialects.include?(dialect.downcase)
21
21
  @selected_dialect = dialect
22
22
  else
23
23
  fail("Unknown dialect override '#{dialect}'!")
@@ -48,11 +48,11 @@ module Clausewitz
48
48
  ),
49
49
  'l_spanish' => LangConfig.new(
50
50
  'spanish',
51
- 'es', []
51
+ 'es', %w[mx es], 'es'
52
52
  ),
53
53
  'l_russian' => LangConfig.new(
54
54
  'russian',
55
- 'ru', []
55
+ 'ru', %w[ru], 'ru'
56
56
  )
57
57
  }
58
58
 
@@ -1,4 +1,4 @@
1
- require 'ffi/aspell'
1
+ require 'ffi/hunspell'
2
2
  require 'open3'
3
3
  require 'pathname'
4
4
  require 'set'
@@ -11,27 +11,39 @@ require 'clausewitz/spelling/results'
11
11
 
12
12
  module Clausewitz; module Spelling
13
13
  class Checker
14
- attr_accessor :dict_words
14
+ DEFAULT_SUGGESTION_COUNT = 3
15
+
15
16
  def initialize(opts = {})
16
- @dictionary_root = opts[:dictionary_root]
17
- @suggestion_count = opts[:suggestion_count] || 3
17
+ @custom_dict_root = opts[:custom_dict_root]
18
+ @custom_dict_root = Pathname.new(@custom_dict_root) if @custom_dict_root
19
+ @custom_dicts = opts[:custom_dicts] || []
20
+ @dialect_map = opts[:dialect_map] || {}
21
+ @suggestion_count = opts[:suggestion_count] || DEFAULT_SUGGESTION_COUNT
18
22
 
19
- if @dictionary_root
20
- @dictionary_root = Pathname.new(@dictionary_root)
21
- end
23
+ load_dictionaries!
24
+ end
22
25
 
23
- dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
24
- dialect_opts.each do |dialect_opt_key|
25
- next unless opts[dialect_opt_key]
26
- next if dialect_opt_key.to_s.end_with?('given')
27
- language_name = dialect_opt_key[/(.+)_dialect/, 1]
28
- config = language_config(language_name)
29
- dialect = opts[dialect_opt_key]
30
- config.select_dialect(dialect.downcase)
31
- end
26
+ def load_dictionaries!
27
+ @loaded_dicts = {}
28
+ Localisation::LANG_MAP.each do |_, config|
29
+ if @dialect_map.key?(config.name)
30
+ config.select_dialect(@dialect_map[config.name])
31
+ end
32
+
33
+ dict = FFI::Hunspell.dict(config.full_name)
34
+
35
+ @custom_dicts.each do |custom_dict|
36
+ path = @custom_dict_root.join("#{config.full_name}_#{custom_dict}")
37
+ path = Pathname.new("#{path}.dic")
38
+ if path.exist?
39
+ dict.add_dic(path.to_s)
40
+ else
41
+ $stderr.puts("Could not load dictionary '#{path}', skipping...")
42
+ end
43
+ end
32
44
 
33
- @loaded_spellcheckers = {}
34
- @loaded_wordlists = {}
45
+ @loaded_dicts[config.name] = dict
46
+ end
35
47
  end
36
48
 
37
49
  def check_file(filepath)
@@ -43,7 +55,7 @@ module Clausewitz; module Spelling
43
55
  return InvalidFilepathResult.new(filepath, e)
44
56
  end
45
57
 
46
- $stderr.puts "Skipping #{filepath}..." if filepath.directory?
58
+ $stderr.puts "Skipping directory '#{filepath}'..." if filepath.directory?
47
59
 
48
60
  begin
49
61
  contents = Clausewitz::Localisation.parse_file(filepath)
@@ -53,16 +65,14 @@ module Clausewitz; module Spelling
53
65
 
54
66
  checks = contents.map do |lang_name, entries|
55
67
  lc = language_config(lang_name)
56
- check_entries(lc, entries)
68
+ check_entries(entries, lc)
57
69
  end
58
70
  FileResults.new(filepath, checks)
59
71
  end
60
72
 
61
73
  private
62
74
 
63
- def check_entries(lc, entries)
64
- wordlist = load_wordlist(lc)
65
- aspell_checker = load_aspell_checker(lc)
75
+ def check_entries(entries, lc)
66
76
  spellcheck_ignore = entries&.delete('spellcheck_ignore')
67
77
  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
68
78
  ignored_keys << 'spellcheck_ignore'
@@ -74,13 +84,13 @@ module Clausewitz; module Spelling
74
84
  if ignored_keys.include?(key)
75
85
  IgnoredEntryResult.new(key)
76
86
  else
77
- check_entry(lc, aspell_checker, wordlist, key, entry)
87
+ check_entry(key, entry, lc)
78
88
  end
79
89
  end
80
90
  LangResults.new(lc.clausewitz_name, checks)
81
91
  end
82
92
 
83
- def check_entry(lc, checker, wordlist, key, entry)
93
+ def check_entry(key, entry, lc)
84
94
  # We don't want to pay attention to scripted localisation, so we'll strip
85
95
  # it out before we start.
86
96
  # TODO: Look into supporting escaped square brackets as part of the
@@ -123,13 +133,19 @@ module Clausewitz; module Spelling
123
133
  }
124
134
  words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
125
135
  words = words.map { |word| word.split('—') }.flatten(1)
136
+ words.map! do |word|
137
+ if word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
138
+ word.sub(/\.$/, '')
139
+ else
140
+ word
141
+ end
142
+ end
126
143
 
127
-
128
- checks = words.map { |word| check_word(checker, wordlist, word) }.compact
144
+ checks = words.map { |word| check_word(word, lc) }.compact
129
145
  EntryResults.new(key, checks)
130
146
  end
131
147
 
132
- def check_word(checker, wordlist, word)
148
+ def check_word(word, lc)
133
149
  return if is_number?(word)
134
150
  return if is_plural_number?(word)
135
151
  return if is_ordinal?(word)
@@ -137,10 +153,11 @@ module Clausewitz; module Spelling
137
153
  return if is_icon?(word)
138
154
  return if is_initial?(word)
139
155
  return if is_psalm?(word)
140
- return if wordlist.include?(word)
141
156
 
142
- if !checker.correct?(word)
143
- MisspelledWordResult.new(word, suggest_words(checker, wordlist, word))
157
+ lang_dict = @loaded_dicts[lc.name]
158
+ if !lang_dict.check?(word)
159
+ suggestions = lang_dict.suggest(word).take(@suggestion_count)
160
+ MisspelledWordResult.new(word, suggestions)
144
161
  end
145
162
  end
146
163
 
@@ -156,26 +173,6 @@ module Clausewitz; module Spelling
156
173
  word =~ /^[A-Z]\.$/
157
174
  end
158
175
 
159
- def suggest_words(checker, wordlist, word)
160
- return [] if word.size < 3
161
-
162
- suggestions = Set.new
163
-
164
- aspell_suggestions = checker.suggestions(word)
165
-
166
- custom_suggestions = wordlist.select do |dict_word|
167
- min = [word.size, dict_word.size].min
168
- DamerauLevenshtein.distance(word, dict_word) < min
169
- end
170
-
171
- aspell_suggestions.each { |sug| suggestions.add(sug) }
172
- custom_suggestions.each { |sug| suggestions.add(sug) }
173
-
174
- suggestions.to_a.sort_by do |sug|
175
- DamerauLevenshtein.distance(sug, word)
176
- end.first(@suggestion_count)
177
- end
178
-
179
176
  def is_icon?(word)
180
177
  word =~ /^£\w+/
181
178
  end
@@ -193,71 +190,19 @@ module Clausewitz; module Spelling
193
190
  word =~ /%(-|\+)?[0-9]+(\.[0-9]+)?/
194
191
  end
195
192
 
196
- # Loads our custom wordlist into a temporary Aspell dictionary.
197
- # This way Aspell won't yell at us for custom words and will also
198
- # potentially select from this list as suggestions for misspelled words.
199
- def load_custom_dictionary(lc)
200
- dir = Dir.mktmpdir("custom-wordlist-#{lc.full_name}-")
201
- output = File.join(dir, "#{lc.full_name}-custom.wlst")
202
- cmd = %W[
203
- aspell --lang=#{lc.base} --encoding=UTF-8 create master #{output}
204
- ]
205
- value = nil
206
- Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr|
207
- dict_path = File.join(@dictionary_root, lc.full_name, 'dict.txt')
208
- contents = File.read(dict_path)
209
- words = contents.lines.map(&:chomp)
210
- words.each do |word|
211
- stdin.puts(word)
212
- end
213
- stdin.close
214
- value = wait_thr.value
215
- end
216
- unless value.success?
217
- fail("Could not generate custom word list for #{lc.full_name}!")
218
- end
219
- output
220
- end
221
-
222
- def load_aspell_checker(lc)
223
- if @loaded_spellcheckers[lc.full_name]
224
- return @loaded_spellcheckers[lc.full_name]
225
- end
226
- aspell_checker = FFI::Aspell::Speller.new(
227
- lc.full_name, encoding: 'UTF-8'
228
- )
229
- aspell_checker.set('ignore-accents', true)
230
- if @dictionary_root && @dictionary_root.join(lc.full_name).exist?
231
- custom_words = load_custom_dictionary(lc)
232
- aspell_checker.set('extra-dicts', custom_words)
233
- end
234
- @loaded_spellcheckers[lc.full_name] = aspell_checker
235
- end
236
-
237
- def load_wordlist(lc)
238
- return @loaded_wordlists[lc.full_name] if @loaded_wordlists[lc.full_name]
239
- contents = ''
240
- dict_path = @dictionary_root.join(lc.full_name, 'dict.txt')
241
- if @dictionary_root && dict_path.exist?
242
- contents = File.read(@dictionary_root.join(lc.full_name, 'dict.txt'))
243
- end
244
- words = contents.lines.to_a.map(&:chomp)
245
- @loaded_wordlists[lc.full_name] = Set.new(words)
246
- end
247
-
248
193
  def language_config(language_name)
249
194
  language_name = "l_#{language_name}" if language_name !~ /^l_/
250
- aspell_lang_config = Localisation::LANG_MAP.find do |config_key, _|
195
+ lang_config = Localisation::LANG_MAP.find do |config_key, _|
251
196
  language_name == config_key
252
197
  end
253
- fail("Unknown language '#{language_name}'!") unless aspell_lang_config
254
- aspell_lang_config.last
198
+ fail("Unknown language '#{language_name}'!") unless lang_config
199
+ lang_config.last
255
200
  end
256
201
 
257
202
  # Make sure a file to be checked is actually present and readable.
258
203
  def validate_filepath!(filepath)
259
- fail("No such file #{filepath}!") unless filepath.exist?
260
- fail("Cannot read #{filepath}!") unless filepath.readable?
204
+ fail("No such file '#{filepath}'!") unless filepath.exist?
205
+ fail("Cannot read '#{filepath}'!") unless filepath.readable?
261
206
  end
262
207
  end
263
208
  end; end
@@ -1,5 +1,5 @@
1
1
  module Clausewitz
2
2
  module Spelling
3
- VERSION = "0.1.19"
3
+ VERSION = "0.2.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clausewitz-spelling
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.19
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Chappell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-15 00:00:00.000000000 Z
11
+ date: 2019-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.16'
19
+ version: 1.17.2
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.16'
26
+ version: 1.17.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ffi-aspell
70
+ name: ffi-hunspell-wtchappell
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -157,6 +157,7 @@ files:
157
157
  - bin/console
158
158
  - bin/setup
159
159
  - clausewitz-spelling.gemspec
160
+ - en_GB_owb.dic
160
161
  - exe/clausewitz-spellcheck
161
162
  - lib/clausewitz/localisation.rb
162
163
  - lib/clausewitz/spelling.rb
@@ -182,8 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
183
  - !ruby/object:Gem::Version
183
184
  version: '0'
184
185
  requirements: []
185
- rubyforge_project:
186
- rubygems_version: 2.5.2.1
186
+ rubygems_version: 3.0.1
187
187
  signing_key:
188
188
  specification_version: 4
189
189
  summary: Spellchecker tool for Clausewitz engine files