clausewitz-spelling 0.1.19 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: 55b8f238c7ccf5a99e18aea147d865baa3eba23e
4
- data.tar.gz: 737b15a9ea8c6dfb3b1279d1170986aa9b080e8f
2
+ SHA256:
3
+ metadata.gz: c3f825db29cf16aa71180113fdba663b417a44cab29a2cd9d4c2a447c3061ae3
4
+ data.tar.gz: ae39cd480cffe2f0a3ff6cb673c00c53d0d86def3136b274806dc9cacadff860
5
5
  SHA512:
6
- metadata.gz: 523a1c3c5cbed97de4454eca5422a181846544338e7fc108457f924f2a9a3ba9f3b1b1329679e5358f958b12b10d94972bc6a05767fc5feacf7b9bb804444ce5
7
- data.tar.gz: 3206f38862471223cc8fd4096cb398b1b42864b15ef8a60bb82de1e507f4bf497bfb86e7e46fbbe57d8eb643141f18d4e42c2f22940314e36c3e6b82feaa4cf6
6
+ metadata.gz: e1880d26258c57c61d8ba8a1b9b3177e998657cef265411a09c9f5fb95616e370386e3286112e468af581ea8cda235cd4483e95e8c525303598d9377dff363e0
7
+ data.tar.gz: afcd28a0b5f6250b5d4d2f6ec90b443feda4017be1543fb2466f485b261c1b256cd2b6875c26fac95e23d5c571e4a000b95d2738501b78be78c88ee2649ea301
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- clausewitz-spelling (0.1.19)
4
+ clausewitz-spelling (0.2.0)
5
5
  colorize
6
6
  damerau-levenshtein
7
- ffi-aspell
7
+ ffi-hunspell-wtchappell
8
8
  optimist
9
9
  pragmatic_tokenizer
10
10
 
@@ -16,8 +16,8 @@ GEM
16
16
  damerau-levenshtein (1.3.1)
17
17
  diff-lcs (1.3)
18
18
  ffi (1.10.0)
19
- ffi-aspell (1.1.0)
20
- ffi
19
+ ffi-hunspell-wtchappell (0.4.0)
20
+ ffi (~> 1.0)
21
21
  method_source (0.9.2)
22
22
  optimist (3.0.0)
23
23
  pragmatic_tokenizer (3.0.7)
@@ -45,11 +45,11 @@ PLATFORMS
45
45
  ruby
46
46
 
47
47
  DEPENDENCIES
48
- bundler (~> 1.16)
48
+ bundler (~> 1.17.2)
49
49
  clausewitz-spelling!
50
50
  pry
51
51
  rake (~> 10.0)
52
52
  rspec (~> 3.0)
53
53
 
54
54
  BUNDLED WITH
55
- 1.16.1
55
+ 1.17.2
@@ -20,12 +20,12 @@ Gem::Specification.new do |spec|
20
20
  spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
21
21
  spec.require_paths = ["lib"]
22
22
 
23
- spec.add_development_dependency "bundler", "~> 1.16"
23
+ spec.add_development_dependency "bundler", "~> 1.17.2"
24
24
  spec.add_development_dependency "rake", "~> 10.0"
25
25
  spec.add_development_dependency "rspec", "~> 3.0"
26
26
  spec.add_development_dependency "pry"
27
27
 
28
- spec.add_dependency "ffi-aspell"
28
+ spec.add_dependency "ffi-hunspell-wtchappell"
29
29
  spec.add_dependency "optimist"
30
30
  spec.add_dependency "colorize"
31
31
  spec.add_dependency "damerau-levenshtein"
data/en_GB_owb.dic ADDED
@@ -0,0 +1,81 @@
1
+ 35
2
+ Adytum/1
3
+ Almirante/1
4
+ APC/2 1
5
+ Allgood/1
6
+ Aradesh/1
7
+ Bearport/1
8
+ Boneyard/1
9
+ Chico/1
10
+ Ciudad/1
11
+ Colibrí/1
12
+ Darkwater/1
13
+ Dayglow/1
14
+ Elijah/1
15
+ Elijah's
16
+ FEV/1
17
+ GAI/1
18
+ Gizmo/1
19
+ Harbor/1
20
+ Hardin/2 1
21
+ Hardin's
22
+ Harlon/1
23
+ Héctor/1
24
+ Henderton/1
25
+ Hubology
26
+ Hubologist/2 1
27
+ Junktown/1
28
+ Klamath/1
29
+ Los
30
+ Maxson/1
31
+ Morbid/1
32
+ Mossman/1
33
+ NCR/1
34
+ Paz/1
35
+ Pesca/1
36
+ Peterson/1
37
+ Petro/1
38
+ Rafael/1
39
+ Rattletail/1
40
+ Reynosa/1
41
+ RobCo/1
42
+ Santángel/1
43
+ Seabear/1
44
+ Shi/1
45
+ Silus/1
46
+ Tamaulipas/1
47
+ Tampico/1
48
+ Tandi/1
49
+ Tech
50
+ Tlaloc/1
51
+ TODO/1
52
+ West-Tek/1
53
+ Valles/1
54
+ Vandenberg/1
55
+ Vault-Tec/1
56
+ bandito/2 1
57
+ brahmin/5 1
58
+ caudillo/2 1
59
+ commonfolk/5 1
60
+ deathclaw/2 1
61
+ doodad/2 1
62
+ eyebot/2
63
+ firefight/2 1
64
+ firepower/5 1
65
+ hardline/2 1
66
+ lakebed/2 1
67
+ malcontents
68
+ military's
69
+ playbook/2 1
70
+ protectron/2 1
71
+ radscorpion/2 1
72
+ robobrain/2 1
73
+ securitron/2 1
74
+ shopfront/2 1
75
+ superweapon/2 1
76
+ tribals/5 1
77
+ underway
78
+ unpowered
79
+ vertibird/2 1
80
+ weaponsmith/2 1
81
+ wildcard/2 1
@@ -9,8 +9,11 @@ class Main
9
9
 
10
10
  def parse_args(args)
11
11
  opts = Optimist::options(args) do
12
- opt :dictionary_root,
13
- "Directory containing per-language custom word lists",
12
+ opt :custom_dicts,
13
+ "List of custom dictionaries to load",
14
+ type: :strings
15
+ opt :custom_dict_root,
16
+ "Directory containing per-language-dialect custom dicts",
14
17
  type: :string
15
18
  opt :suggestion_count,
16
19
  "How many suggestions to display",
@@ -21,7 +24,23 @@ class Main
21
24
  type: :string
22
25
  end
23
26
  end
24
- [opts, args]
27
+
28
+ dialect_map = {}
29
+ dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
30
+ dialect_opts.each do |dialect_opt_key|
31
+ next unless opts[dialect_opt_key]
32
+ next if dialect_opt_key.to_s.end_with?('given')
33
+ language_name = dialect_opt_key[/(.+)_dialect/, 1]
34
+ dialect_map[language_name] = opts[dialect_opt_key]
35
+ end
36
+
37
+ checker_opts = {}
38
+ checker_opts[:custom_dict_root] = opts[:custom_dict_root]
39
+ checker_opts[:custom_dicts] = opts[:custom_dicts]
40
+ checker_opts[:dialect_map] = dialect_map
41
+ checker_opts[:suggestion_count] = opts[:suggestion_count]
42
+
43
+ [checker_opts, args]
25
44
  end
26
45
 
27
46
  def run
@@ -17,7 +17,7 @@ module Clausewitz
17
17
  end
18
18
 
19
19
  def select_dialect(dialect)
20
- if @dialects.include?(dialect)
20
+ if @dialects.include?(dialect.downcase)
21
21
  @selected_dialect = dialect
22
22
  else
23
23
  fail("Unknown dialect override '#{dialect}'!")
@@ -48,11 +48,11 @@ module Clausewitz
48
48
  ),
49
49
  'l_spanish' => LangConfig.new(
50
50
  'spanish',
51
- 'es', []
51
+ 'es', %w[mx es], 'es'
52
52
  ),
53
53
  'l_russian' => LangConfig.new(
54
54
  'russian',
55
- 'ru', []
55
+ 'ru', %w[ru], 'ru'
56
56
  )
57
57
  }
58
58
 
@@ -1,4 +1,4 @@
1
- require 'ffi/aspell'
1
+ require 'ffi/hunspell'
2
2
  require 'open3'
3
3
  require 'pathname'
4
4
  require 'set'
@@ -11,27 +11,39 @@ require 'clausewitz/spelling/results'
11
11
 
12
12
  module Clausewitz; module Spelling
13
13
  class Checker
14
- attr_accessor :dict_words
14
+ DEFAULT_SUGGESTION_COUNT = 3
15
+
15
16
  def initialize(opts = {})
16
- @dictionary_root = opts[:dictionary_root]
17
- @suggestion_count = opts[:suggestion_count] || 3
17
+ @custom_dict_root = opts[:custom_dict_root]
18
+ @custom_dict_root = Pathname.new(@custom_dict_root) if @custom_dict_root
19
+ @custom_dicts = opts[:custom_dicts] || []
20
+ @dialect_map = opts[:dialect_map] || {}
21
+ @suggestion_count = opts[:suggestion_count] || DEFAULT_SUGGESTION_COUNT
18
22
 
19
- if @dictionary_root
20
- @dictionary_root = Pathname.new(@dictionary_root)
21
- end
23
+ load_dictionaries!
24
+ end
22
25
 
23
- dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
24
- dialect_opts.each do |dialect_opt_key|
25
- next unless opts[dialect_opt_key]
26
- next if dialect_opt_key.to_s.end_with?('given')
27
- language_name = dialect_opt_key[/(.+)_dialect/, 1]
28
- config = language_config(language_name)
29
- dialect = opts[dialect_opt_key]
30
- config.select_dialect(dialect.downcase)
31
- end
26
+ def load_dictionaries!
27
+ @loaded_dicts = {}
28
+ Localisation::LANG_MAP.each do |_, config|
29
+ if @dialect_map.key?(config.name)
30
+ config.select_dialect(@dialect_map[config.name])
31
+ end
32
+
33
+ dict = FFI::Hunspell.dict(config.full_name)
34
+
35
+ @custom_dicts.each do |custom_dict|
36
+ path = @custom_dict_root.join("#{config.full_name}_#{custom_dict}")
37
+ path = Pathname.new("#{path}.dic")
38
+ if path.exist?
39
+ dict.add_dic(path.to_s)
40
+ else
41
+ $stderr.puts("Could not load dictionary '#{path}', skipping...")
42
+ end
43
+ end
32
44
 
33
- @loaded_spellcheckers = {}
34
- @loaded_wordlists = {}
45
+ @loaded_dicts[config.name] = dict
46
+ end
35
47
  end
36
48
 
37
49
  def check_file(filepath)
@@ -43,7 +55,7 @@ module Clausewitz; module Spelling
43
55
  return InvalidFilepathResult.new(filepath, e)
44
56
  end
45
57
 
46
- $stderr.puts "Skipping #{filepath}..." if filepath.directory?
58
+ $stderr.puts "Skipping directory '#{filepath}'..." if filepath.directory?
47
59
 
48
60
  begin
49
61
  contents = Clausewitz::Localisation.parse_file(filepath)
@@ -53,16 +65,14 @@ module Clausewitz; module Spelling
53
65
 
54
66
  checks = contents.map do |lang_name, entries|
55
67
  lc = language_config(lang_name)
56
- check_entries(lc, entries)
68
+ check_entries(entries, lc)
57
69
  end
58
70
  FileResults.new(filepath, checks)
59
71
  end
60
72
 
61
73
  private
62
74
 
63
- def check_entries(lc, entries)
64
- wordlist = load_wordlist(lc)
65
- aspell_checker = load_aspell_checker(lc)
75
+ def check_entries(entries, lc)
66
76
  spellcheck_ignore = entries&.delete('spellcheck_ignore')
67
77
  ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
68
78
  ignored_keys << 'spellcheck_ignore'
@@ -74,13 +84,13 @@ module Clausewitz; module Spelling
74
84
  if ignored_keys.include?(key)
75
85
  IgnoredEntryResult.new(key)
76
86
  else
77
- check_entry(lc, aspell_checker, wordlist, key, entry)
87
+ check_entry(key, entry, lc)
78
88
  end
79
89
  end
80
90
  LangResults.new(lc.clausewitz_name, checks)
81
91
  end
82
92
 
83
- def check_entry(lc, checker, wordlist, key, entry)
93
+ def check_entry(key, entry, lc)
84
94
  # We don't want to pay attention to scripted localisation, so we'll strip
85
95
  # it out before we start.
86
96
  # TODO: Look into supporting escaped square brackets as part of the
@@ -123,13 +133,19 @@ module Clausewitz; module Spelling
123
133
  }
124
134
  words = PragmaticTokenizer::Tokenizer.new(opts).tokenize(entry)
125
135
  words = words.map { |word| word.split('—') }.flatten(1)
136
+ words.map! do |word|
137
+ if word =~ /[[:alpha:]]\.$/ && word.chars.count('.') == 1
138
+ word.sub(/\.$/, '')
139
+ else
140
+ word
141
+ end
142
+ end
126
143
 
127
-
128
- checks = words.map { |word| check_word(checker, wordlist, word) }.compact
144
+ checks = words.map { |word| check_word(word, lc) }.compact
129
145
  EntryResults.new(key, checks)
130
146
  end
131
147
 
132
- def check_word(checker, wordlist, word)
148
+ def check_word(word, lc)
133
149
  return if is_number?(word)
134
150
  return if is_plural_number?(word)
135
151
  return if is_ordinal?(word)
@@ -137,10 +153,11 @@ module Clausewitz; module Spelling
137
153
  return if is_icon?(word)
138
154
  return if is_initial?(word)
139
155
  return if is_psalm?(word)
140
- return if wordlist.include?(word)
141
156
 
142
- if !checker.correct?(word)
143
- MisspelledWordResult.new(word, suggest_words(checker, wordlist, word))
157
+ lang_dict = @loaded_dicts[lc.name]
158
+ if !lang_dict.check?(word)
159
+ suggestions = lang_dict.suggest(word).take(@suggestion_count)
160
+ MisspelledWordResult.new(word, suggestions)
144
161
  end
145
162
  end
146
163
 
@@ -156,26 +173,6 @@ module Clausewitz; module Spelling
156
173
  word =~ /^[A-Z]\.$/
157
174
  end
158
175
 
159
- def suggest_words(checker, wordlist, word)
160
- return [] if word.size < 3
161
-
162
- suggestions = Set.new
163
-
164
- aspell_suggestions = checker.suggestions(word)
165
-
166
- custom_suggestions = wordlist.select do |dict_word|
167
- min = [word.size, dict_word.size].min
168
- DamerauLevenshtein.distance(word, dict_word) < min
169
- end
170
-
171
- aspell_suggestions.each { |sug| suggestions.add(sug) }
172
- custom_suggestions.each { |sug| suggestions.add(sug) }
173
-
174
- suggestions.to_a.sort_by do |sug|
175
- DamerauLevenshtein.distance(sug, word)
176
- end.first(@suggestion_count)
177
- end
178
-
179
176
  def is_icon?(word)
180
177
  word =~ /^£\w+/
181
178
  end
@@ -193,71 +190,19 @@ module Clausewitz; module Spelling
193
190
  word =~ /%(-|\+)?[0-9]+(\.[0-9]+)?/
194
191
  end
195
192
 
196
- # Loads our custom wordlist into a temporary Aspell dictionary.
197
- # This way Aspell won't yell at us for custom words and will also
198
- # potentially select from this list as suggestions for misspelled words.
199
- def load_custom_dictionary(lc)
200
- dir = Dir.mktmpdir("custom-wordlist-#{lc.full_name}-")
201
- output = File.join(dir, "#{lc.full_name}-custom.wlst")
202
- cmd = %W[
203
- aspell --lang=#{lc.base} --encoding=UTF-8 create master #{output}
204
- ]
205
- value = nil
206
- Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr|
207
- dict_path = File.join(@dictionary_root, lc.full_name, 'dict.txt')
208
- contents = File.read(dict_path)
209
- words = contents.lines.map(&:chomp)
210
- words.each do |word|
211
- stdin.puts(word)
212
- end
213
- stdin.close
214
- value = wait_thr.value
215
- end
216
- unless value.success?
217
- fail("Could not generate custom word list for #{lc.full_name}!")
218
- end
219
- output
220
- end
221
-
222
- def load_aspell_checker(lc)
223
- if @loaded_spellcheckers[lc.full_name]
224
- return @loaded_spellcheckers[lc.full_name]
225
- end
226
- aspell_checker = FFI::Aspell::Speller.new(
227
- lc.full_name, encoding: 'UTF-8'
228
- )
229
- aspell_checker.set('ignore-accents', true)
230
- if @dictionary_root && @dictionary_root.join(lc.full_name).exist?
231
- custom_words = load_custom_dictionary(lc)
232
- aspell_checker.set('extra-dicts', custom_words)
233
- end
234
- @loaded_spellcheckers[lc.full_name] = aspell_checker
235
- end
236
-
237
- def load_wordlist(lc)
238
- return @loaded_wordlists[lc.full_name] if @loaded_wordlists[lc.full_name]
239
- contents = ''
240
- dict_path = @dictionary_root.join(lc.full_name, 'dict.txt')
241
- if @dictionary_root && dict_path.exist?
242
- contents = File.read(@dictionary_root.join(lc.full_name, 'dict.txt'))
243
- end
244
- words = contents.lines.to_a.map(&:chomp)
245
- @loaded_wordlists[lc.full_name] = Set.new(words)
246
- end
247
-
248
193
  def language_config(language_name)
249
194
  language_name = "l_#{language_name}" if language_name !~ /^l_/
250
- aspell_lang_config = Localisation::LANG_MAP.find do |config_key, _|
195
+ lang_config = Localisation::LANG_MAP.find do |config_key, _|
251
196
  language_name == config_key
252
197
  end
253
- fail("Unknown language '#{language_name}'!") unless aspell_lang_config
254
- aspell_lang_config.last
198
+ fail("Unknown language '#{language_name}'!") unless lang_config
199
+ lang_config.last
255
200
  end
256
201
 
257
202
  # Make sure a file to be checked is actually present and readable.
258
203
  def validate_filepath!(filepath)
259
- fail("No such file #{filepath}!") unless filepath.exist?
260
- fail("Cannot read #{filepath}!") unless filepath.readable?
204
+ fail("No such file '#{filepath}'!") unless filepath.exist?
205
+ fail("Cannot read '#{filepath}'!") unless filepath.readable?
261
206
  end
262
207
  end
263
208
  end; end
@@ -1,5 +1,5 @@
1
1
  module Clausewitz
2
2
  module Spelling
3
- VERSION = "0.1.19"
3
+ VERSION = "0.2.0"
4
4
  end
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: clausewitz-spelling
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.19
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Will Chappell
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2019-01-15 00:00:00.000000000 Z
11
+ date: 2019-06-16 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,14 +16,14 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.16'
19
+ version: 1.17.2
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.16'
26
+ version: 1.17.2
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
@@ -67,7 +67,7 @@ dependencies:
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
69
  - !ruby/object:Gem::Dependency
70
- name: ffi-aspell
70
+ name: ffi-hunspell-wtchappell
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
73
  - - ">="
@@ -157,6 +157,7 @@ files:
157
157
  - bin/console
158
158
  - bin/setup
159
159
  - clausewitz-spelling.gemspec
160
+ - en_GB_owb.dic
160
161
  - exe/clausewitz-spellcheck
161
162
  - lib/clausewitz/localisation.rb
162
163
  - lib/clausewitz/spelling.rb
@@ -182,8 +183,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
182
183
  - !ruby/object:Gem::Version
183
184
  version: '0'
184
185
  requirements: []
185
- rubyforge_project:
186
- rubygems_version: 2.5.2.1
186
+ rubygems_version: 3.0.1
187
187
  signing_key:
188
188
  specification_version: 4
189
189
  summary: Spellchecker tool for Clausewitz engine files