clausewitz-spelling 0.1.17 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/exe/clausewitz-spellcheck +5 -0
- data/lib/clausewitz/localisation.rb +52 -19
- data/lib/clausewitz/spelling/checker.rb +66 -38
- data/lib/clausewitz/spelling/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45dfa3f0e350c7b4b3ba9d1a6f69c970cb884134
|
4
|
+
data.tar.gz: c4ffb8579ab8787115d447b11fa49cb4a3159569
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4661cea315ffec778213bf3b67ea6322973d19ce5a6302a23b2f8afef4eb1231583df251b7b098274bfd7b32b9d3f40cf01bca6cc65efd87bc3bd8581cd73e69
|
7
|
+
data.tar.gz: 44c9608f76e094a80d7ffd4924641ab65ede46b46a10a99daad2b70a4f7c9d0934df1f6b243a84deb23a71a9e21b2734e6205012f42d8c173e5cda9883bc6f97
|
data/Gemfile.lock
CHANGED
data/exe/clausewitz-spellcheck
CHANGED
@@ -15,6 +15,11 @@ class Main
|
|
15
15
|
opt :suggestion_count,
|
16
16
|
"How many suggestions to display",
|
17
17
|
type: :int
|
18
|
+
Clausewitz::Localisation::LANG_MAP.each do |_, config|
|
19
|
+
opt "#{config.name}_dialect".to_sym,
|
20
|
+
"Select dialect for #{config.name.capitalize}",
|
21
|
+
type: :string
|
22
|
+
end
|
18
23
|
end
|
19
24
|
[opts, args]
|
20
25
|
end
|
@@ -2,25 +2,58 @@ require 'yaml'
|
|
2
2
|
|
3
3
|
module Clausewitz
|
4
4
|
module Localisation
|
5
|
+
class LangConfig
|
6
|
+
attr_reader :name, :base, :dialects, :default_dialect
|
7
|
+
def initialize(name, base, dialects, default_dialect = nil)
|
8
|
+
@name = name
|
9
|
+
@base = base
|
10
|
+
@dialects = dialects
|
11
|
+
@default_dialect = default_dialect
|
12
|
+
@selected_dialect = @default_dialect
|
13
|
+
end
|
14
|
+
|
15
|
+
def clausewitz_name
|
16
|
+
"l_#{@name}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def select_dialect(dialect)
|
20
|
+
if @dialects.include?(dialect)
|
21
|
+
@selected_dialect = dialect
|
22
|
+
else
|
23
|
+
fail("Unknown dialect override '#{dialect}'!")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def full_name
|
28
|
+
@selected_dialect ? "#{@base}_#{@selected_dialect.upcase}" : @base
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
5
32
|
LANG_MAP = {
|
6
|
-
'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
33
|
+
'l_english' => LangConfig.new(
|
34
|
+
'english',
|
35
|
+
'en', %w[gb us ca], 'gb'
|
36
|
+
),
|
37
|
+
'l_french' => LangConfig.new(
|
38
|
+
'french',
|
39
|
+
'fr', %w[fr ca], 'fr'
|
40
|
+
),
|
41
|
+
'l_german' => LangConfig.new(
|
42
|
+
'german',
|
43
|
+
'de', %w[de], 'de'
|
44
|
+
),
|
45
|
+
'l_portuguese' => LangConfig.new(
|
46
|
+
'portuguese',
|
47
|
+
'pt', %w[pt br], 'pt'
|
48
|
+
),
|
49
|
+
'l_spanish' => LangConfig.new(
|
50
|
+
'spanish',
|
51
|
+
'es', []
|
52
|
+
),
|
53
|
+
'l_russian' => LangConfig.new(
|
54
|
+
'russian',
|
55
|
+
'ru', []
|
56
|
+
)
|
24
57
|
}
|
25
58
|
|
26
59
|
def self.parse(text)
|
@@ -56,7 +89,7 @@ module Clausewitz
|
|
56
89
|
end
|
57
90
|
end
|
58
91
|
|
59
|
-
VALID_LANG_REGEX =
|
92
|
+
VALID_LANG_REGEX = /(#{LANG_MAP.keys.join('|')})/
|
60
93
|
def self.valid_lang?(lang)
|
61
94
|
lang =~ VALID_LANG_REGEX
|
62
95
|
end
|
@@ -20,6 +20,16 @@ module Clausewitz; module Spelling
|
|
20
20
|
@dictionary_root = Pathname.new(@dictionary_root)
|
21
21
|
end
|
22
22
|
|
23
|
+
dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
|
24
|
+
dialect_opts.each do |dialect_opt_key|
|
25
|
+
next unless opts[dialect_opt_key]
|
26
|
+
next if dialect_opt_key.to_s.end_with?('given')
|
27
|
+
language_name = dialect_opt_key[/(.+)_dialect/, 1]
|
28
|
+
config = language_config(language_name)
|
29
|
+
dialect = opts[dialect_opt_key]
|
30
|
+
config.select_dialect(dialect.downcase)
|
31
|
+
end
|
32
|
+
|
23
33
|
@loaded_spellcheckers = {}
|
24
34
|
@loaded_wordlists = {}
|
25
35
|
end
|
@@ -41,22 +51,25 @@ module Clausewitz; module Spelling
|
|
41
51
|
return UnparseableFileResult.new(filepath, e)
|
42
52
|
end
|
43
53
|
|
44
|
-
checks = contents.map do |
|
45
|
-
|
54
|
+
checks = contents.map do |lang_name, entries|
|
55
|
+
lc = language_config(lang_name)
|
56
|
+
check_entries(lc, entries)
|
46
57
|
end
|
47
58
|
FileResults.new(filepath, checks)
|
48
59
|
end
|
49
60
|
|
50
61
|
private
|
51
62
|
|
52
|
-
def check_entries(
|
53
|
-
wordlist = load_wordlist(
|
54
|
-
aspell_checker = load_aspell_checker(
|
63
|
+
def check_entries(lc, entries)
|
64
|
+
wordlist = load_wordlist(lc)
|
65
|
+
aspell_checker = load_aspell_checker(lc)
|
55
66
|
spellcheck_ignore = entries&.delete('spellcheck_ignore')
|
56
67
|
ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
|
57
68
|
ignored_keys << 'spellcheck_ignore'
|
58
|
-
|
59
|
-
|
69
|
+
if ignored_keys.include?('all')
|
70
|
+
return IgnoredLangResult.new(lc.clausewitz_name)
|
71
|
+
end
|
72
|
+
return LangResults.new(lc.clausewitz_name, []) unless entries
|
60
73
|
checks = entries.map do |key, entry|
|
61
74
|
if ignored_keys.include?(key)
|
62
75
|
IgnoredEntryResult.new(key)
|
@@ -64,7 +77,7 @@ module Clausewitz; module Spelling
|
|
64
77
|
check_entry(aspell_checker, wordlist, key, entry)
|
65
78
|
end
|
66
79
|
end
|
67
|
-
LangResults.new(
|
80
|
+
LangResults.new(lc.clausewitz_name, checks)
|
68
81
|
end
|
69
82
|
|
70
83
|
def check_entry(checker, wordlist, key, entry)
|
@@ -73,6 +86,7 @@ module Clausewitz; module Spelling
|
|
73
86
|
# TODO: Look into supporting escaped square brackets as part of the
|
74
87
|
# string.
|
75
88
|
entry.gsub!(/\[.+\]/, '')
|
89
|
+
entry.gsub!(/\$([A-Z]|\||\d|=)+\$/, '')
|
76
90
|
|
77
91
|
# Remove other localisation bits we don't care about.
|
78
92
|
entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
|
@@ -120,7 +134,8 @@ module Clausewitz; module Spelling
|
|
120
134
|
return if is_ordinal?(word)
|
121
135
|
return if is_percentage?(word)
|
122
136
|
return if is_icon?(word)
|
123
|
-
return if
|
137
|
+
return if is_initial?(word)
|
138
|
+
return if is_psalm?(word)
|
124
139
|
return if wordlist.include?(word)
|
125
140
|
|
126
141
|
if !checker.correct?(word)
|
@@ -128,6 +143,14 @@ module Clausewitz; module Spelling
|
|
128
143
|
end
|
129
144
|
end
|
130
145
|
|
146
|
+
def is_psalm?(word)
|
147
|
+
word =~ /^\d+:\d+$/
|
148
|
+
end
|
149
|
+
|
150
|
+
def is_initial?(word)
|
151
|
+
word =~ /^[A-Z]\.$/
|
152
|
+
end
|
153
|
+
|
131
154
|
def suggest_words(checker, wordlist, word)
|
132
155
|
return [] if word.size < 3
|
133
156
|
|
@@ -151,10 +174,6 @@ module Clausewitz; module Spelling
|
|
151
174
|
word =~ /^£\w+/
|
152
175
|
end
|
153
176
|
|
154
|
-
def is_define?(word)
|
155
|
-
word =~ /^\$(\w|\|)+\$/
|
156
|
-
end
|
157
|
-
|
158
177
|
def is_number?(word)
|
159
178
|
Float(word) != nil rescue false
|
160
179
|
end
|
@@ -171,15 +190,16 @@ module Clausewitz; module Spelling
|
|
171
190
|
# Loads our custom wordlist into a temporary Aspell dictionary.
|
172
191
|
# This way Aspell won't yell at us for custom words and will also
|
173
192
|
# potentially select from this list as suggestions for misspelled words.
|
174
|
-
def load_custom_dictionary(
|
175
|
-
dir = Dir.mktmpdir("custom-wordlist-#{
|
176
|
-
output = File.join(dir, "#{
|
193
|
+
def load_custom_dictionary(lc)
|
194
|
+
dir = Dir.mktmpdir("custom-wordlist-#{lc.full_name}-")
|
195
|
+
output = File.join(dir, "#{lc.full_name}-custom.wlst")
|
177
196
|
cmd = %W[
|
178
|
-
aspell --lang=#{
|
197
|
+
aspell --lang=#{lc.base} --encoding=UTF-8 create master #{output}
|
179
198
|
]
|
180
199
|
value = nil
|
181
200
|
Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr|
|
182
|
-
|
201
|
+
dict_path = File.join(@dictionary_root, lc.full_name, 'dict.txt')
|
202
|
+
contents = File.read(dict_path)
|
183
203
|
words = contents.lines.map(&:chomp)
|
184
204
|
words.each do |word|
|
185
205
|
stdin.puts(word)
|
@@ -188,36 +208,44 @@ module Clausewitz; module Spelling
|
|
188
208
|
value = wait_thr.value
|
189
209
|
end
|
190
210
|
unless value.success?
|
191
|
-
fail("Could not generate custom word list for #{
|
211
|
+
fail("Could not generate custom word list for #{lc.full_name}!")
|
192
212
|
end
|
193
213
|
output
|
194
214
|
end
|
195
215
|
|
196
|
-
def load_aspell_checker(
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
216
|
+
def load_aspell_checker(lc)
|
217
|
+
if @loaded_spellcheckers[lc.full_name]
|
218
|
+
return @loaded_spellcheckers[lc.full_name]
|
219
|
+
end
|
220
|
+
aspell_checker = FFI::Aspell::Speller.new(
|
221
|
+
lc.full_name, encoding: 'UTF-8'
|
222
|
+
)
|
203
223
|
aspell_checker.set('ignore-accents', true)
|
204
|
-
if @dictionary_root && @dictionary_root.join(
|
205
|
-
custom_words = load_custom_dictionary(
|
224
|
+
if @dictionary_root && @dictionary_root.join(lc.full_name).exist?
|
225
|
+
custom_words = load_custom_dictionary(lc)
|
206
226
|
aspell_checker.set('extra-dicts', custom_words)
|
207
227
|
end
|
208
|
-
@loaded_spellcheckers[
|
228
|
+
@loaded_spellcheckers[lc.full_name] = aspell_checker
|
229
|
+
end
|
230
|
+
|
231
|
+
def load_wordlist(lc)
|
232
|
+
return @loaded_wordlists[lc.full_name] if @loaded_wordlists[lc.full_name]
|
233
|
+
contents = ''
|
234
|
+
dict_path = @dictionary_root.join(lc.full_name, 'dict.txt')
|
235
|
+
if @dictionary_root && dict_path.exist?
|
236
|
+
contents = File.read(@dictionary_root.join(lc.full_name, 'dict.txt'))
|
237
|
+
end
|
238
|
+
words = contents.lines.to_a.map(&:chomp)
|
239
|
+
@loaded_wordlists[lc.full_name] = Set.new(words)
|
209
240
|
end
|
210
241
|
|
211
|
-
def
|
212
|
-
|
213
|
-
aspell_lang_config = Localisation::LANG_MAP.
|
214
|
-
|
215
|
-
end.first
|
216
|
-
lang_code = aspell_lang_config.last[:base]
|
217
|
-
if @dictionary_root && @dictionary_root.join(lang_code).exist?
|
218
|
-
contents = File.read(@dictionary_root.join(lang_code, 'dict.txt'))
|
242
|
+
def language_config(language_name)
|
243
|
+
language_name = "l_#{language_name}" if language_name !~ /^l_/
|
244
|
+
aspell_lang_config = Localisation::LANG_MAP.find do |config_key, _|
|
245
|
+
language_name == config_key
|
219
246
|
end
|
220
|
-
|
247
|
+
fail("Unknown language '#{language_name}'!") unless aspell_lang_config
|
248
|
+
aspell_lang_config.last
|
221
249
|
end
|
222
250
|
|
223
251
|
# Make sure a file to be checked is actually present and readable.
|