clausewitz-spelling 0.1.17 → 0.1.18
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/exe/clausewitz-spellcheck +5 -0
- data/lib/clausewitz/localisation.rb +52 -19
- data/lib/clausewitz/spelling/checker.rb +66 -38
- data/lib/clausewitz/spelling/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 45dfa3f0e350c7b4b3ba9d1a6f69c970cb884134
|
4
|
+
data.tar.gz: c4ffb8579ab8787115d447b11fa49cb4a3159569
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4661cea315ffec778213bf3b67ea6322973d19ce5a6302a23b2f8afef4eb1231583df251b7b098274bfd7b32b9d3f40cf01bca6cc65efd87bc3bd8581cd73e69
|
7
|
+
data.tar.gz: 44c9608f76e094a80d7ffd4924641ab65ede46b46a10a99daad2b70a4f7c9d0934df1f6b243a84deb23a71a9e21b2734e6205012f42d8c173e5cda9883bc6f97
|
data/Gemfile.lock
CHANGED
data/exe/clausewitz-spellcheck
CHANGED
@@ -15,6 +15,11 @@ class Main
|
|
15
15
|
opt :suggestion_count,
|
16
16
|
"How many suggestions to display",
|
17
17
|
type: :int
|
18
|
+
Clausewitz::Localisation::LANG_MAP.each do |_, config|
|
19
|
+
opt "#{config.name}_dialect".to_sym,
|
20
|
+
"Select dialect for #{config.name.capitalize}",
|
21
|
+
type: :string
|
22
|
+
end
|
18
23
|
end
|
19
24
|
[opts, args]
|
20
25
|
end
|
@@ -2,25 +2,58 @@ require 'yaml'
|
|
2
2
|
|
3
3
|
module Clausewitz
|
4
4
|
module Localisation
|
5
|
+
class LangConfig
|
6
|
+
attr_reader :name, :base, :dialects, :default_dialect
|
7
|
+
def initialize(name, base, dialects, default_dialect = nil)
|
8
|
+
@name = name
|
9
|
+
@base = base
|
10
|
+
@dialects = dialects
|
11
|
+
@default_dialect = default_dialect
|
12
|
+
@selected_dialect = @default_dialect
|
13
|
+
end
|
14
|
+
|
15
|
+
def clausewitz_name
|
16
|
+
"l_#{@name}"
|
17
|
+
end
|
18
|
+
|
19
|
+
def select_dialect(dialect)
|
20
|
+
if @dialects.include?(dialect)
|
21
|
+
@selected_dialect = dialect
|
22
|
+
else
|
23
|
+
fail("Unknown dialect override '#{dialect}'!")
|
24
|
+
end
|
25
|
+
end
|
26
|
+
|
27
|
+
def full_name
|
28
|
+
@selected_dialect ? "#{@base}_#{@selected_dialect.upcase}" : @base
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
5
32
|
LANG_MAP = {
|
6
|
-
'
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
'
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
33
|
+
'l_english' => LangConfig.new(
|
34
|
+
'english',
|
35
|
+
'en', %w[gb us ca], 'gb'
|
36
|
+
),
|
37
|
+
'l_french' => LangConfig.new(
|
38
|
+
'french',
|
39
|
+
'fr', %w[fr ca], 'fr'
|
40
|
+
),
|
41
|
+
'l_german' => LangConfig.new(
|
42
|
+
'german',
|
43
|
+
'de', %w[de], 'de'
|
44
|
+
),
|
45
|
+
'l_portuguese' => LangConfig.new(
|
46
|
+
'portuguese',
|
47
|
+
'pt', %w[pt br], 'pt'
|
48
|
+
),
|
49
|
+
'l_spanish' => LangConfig.new(
|
50
|
+
'spanish',
|
51
|
+
'es', []
|
52
|
+
),
|
53
|
+
'l_russian' => LangConfig.new(
|
54
|
+
'russian',
|
55
|
+
'ru', []
|
56
|
+
)
|
24
57
|
}
|
25
58
|
|
26
59
|
def self.parse(text)
|
@@ -56,7 +89,7 @@ module Clausewitz
|
|
56
89
|
end
|
57
90
|
end
|
58
91
|
|
59
|
-
VALID_LANG_REGEX =
|
92
|
+
VALID_LANG_REGEX = /(#{LANG_MAP.keys.join('|')})/
|
60
93
|
def self.valid_lang?(lang)
|
61
94
|
lang =~ VALID_LANG_REGEX
|
62
95
|
end
|
@@ -20,6 +20,16 @@ module Clausewitz; module Spelling
|
|
20
20
|
@dictionary_root = Pathname.new(@dictionary_root)
|
21
21
|
end
|
22
22
|
|
23
|
+
dialect_opts = opts.keys.select { |k| k =~ /.+_dialect/ }
|
24
|
+
dialect_opts.each do |dialect_opt_key|
|
25
|
+
next unless opts[dialect_opt_key]
|
26
|
+
next if dialect_opt_key.to_s.end_with?('given')
|
27
|
+
language_name = dialect_opt_key[/(.+)_dialect/, 1]
|
28
|
+
config = language_config(language_name)
|
29
|
+
dialect = opts[dialect_opt_key]
|
30
|
+
config.select_dialect(dialect.downcase)
|
31
|
+
end
|
32
|
+
|
23
33
|
@loaded_spellcheckers = {}
|
24
34
|
@loaded_wordlists = {}
|
25
35
|
end
|
@@ -41,22 +51,25 @@ module Clausewitz; module Spelling
|
|
41
51
|
return UnparseableFileResult.new(filepath, e)
|
42
52
|
end
|
43
53
|
|
44
|
-
checks = contents.map do |
|
45
|
-
|
54
|
+
checks = contents.map do |lang_name, entries|
|
55
|
+
lc = language_config(lang_name)
|
56
|
+
check_entries(lc, entries)
|
46
57
|
end
|
47
58
|
FileResults.new(filepath, checks)
|
48
59
|
end
|
49
60
|
|
50
61
|
private
|
51
62
|
|
52
|
-
def check_entries(
|
53
|
-
wordlist = load_wordlist(
|
54
|
-
aspell_checker = load_aspell_checker(
|
63
|
+
def check_entries(lc, entries)
|
64
|
+
wordlist = load_wordlist(lc)
|
65
|
+
aspell_checker = load_aspell_checker(lc)
|
55
66
|
spellcheck_ignore = entries&.delete('spellcheck_ignore')
|
56
67
|
ignored_keys = spellcheck_ignore ? spellcheck_ignore.split(',') : []
|
57
68
|
ignored_keys << 'spellcheck_ignore'
|
58
|
-
|
59
|
-
|
69
|
+
if ignored_keys.include?('all')
|
70
|
+
return IgnoredLangResult.new(lc.clausewitz_name)
|
71
|
+
end
|
72
|
+
return LangResults.new(lc.clausewitz_name, []) unless entries
|
60
73
|
checks = entries.map do |key, entry|
|
61
74
|
if ignored_keys.include?(key)
|
62
75
|
IgnoredEntryResult.new(key)
|
@@ -64,7 +77,7 @@ module Clausewitz; module Spelling
|
|
64
77
|
check_entry(aspell_checker, wordlist, key, entry)
|
65
78
|
end
|
66
79
|
end
|
67
|
-
LangResults.new(
|
80
|
+
LangResults.new(lc.clausewitz_name, checks)
|
68
81
|
end
|
69
82
|
|
70
83
|
def check_entry(checker, wordlist, key, entry)
|
@@ -73,6 +86,7 @@ module Clausewitz; module Spelling
|
|
73
86
|
# TODO: Look into supporting escaped square brackets as part of the
|
74
87
|
# string.
|
75
88
|
entry.gsub!(/\[.+\]/, '')
|
89
|
+
entry.gsub!(/\$([A-Z]|\||\d|=)+\$/, '')
|
76
90
|
|
77
91
|
# Remove other localisation bits we don't care about.
|
78
92
|
entry.gsub!(/§(%|\*|=|\d|W|G|R|B|Y|b|M|g|T|l|H|\+|-|!)/, '')
|
@@ -120,7 +134,8 @@ module Clausewitz; module Spelling
|
|
120
134
|
return if is_ordinal?(word)
|
121
135
|
return if is_percentage?(word)
|
122
136
|
return if is_icon?(word)
|
123
|
-
return if
|
137
|
+
return if is_initial?(word)
|
138
|
+
return if is_psalm?(word)
|
124
139
|
return if wordlist.include?(word)
|
125
140
|
|
126
141
|
if !checker.correct?(word)
|
@@ -128,6 +143,14 @@ module Clausewitz; module Spelling
|
|
128
143
|
end
|
129
144
|
end
|
130
145
|
|
146
|
+
def is_psalm?(word)
|
147
|
+
word =~ /^\d+:\d+$/
|
148
|
+
end
|
149
|
+
|
150
|
+
def is_initial?(word)
|
151
|
+
word =~ /^[A-Z]\.$/
|
152
|
+
end
|
153
|
+
|
131
154
|
def suggest_words(checker, wordlist, word)
|
132
155
|
return [] if word.size < 3
|
133
156
|
|
@@ -151,10 +174,6 @@ module Clausewitz; module Spelling
|
|
151
174
|
word =~ /^£\w+/
|
152
175
|
end
|
153
176
|
|
154
|
-
def is_define?(word)
|
155
|
-
word =~ /^\$(\w|\|)+\$/
|
156
|
-
end
|
157
|
-
|
158
177
|
def is_number?(word)
|
159
178
|
Float(word) != nil rescue false
|
160
179
|
end
|
@@ -171,15 +190,16 @@ module Clausewitz; module Spelling
|
|
171
190
|
# Loads our custom wordlist into a temporary Aspell dictionary.
|
172
191
|
# This way Aspell won't yell at us for custom words and will also
|
173
192
|
# potentially select from this list as suggestions for misspelled words.
|
174
|
-
def load_custom_dictionary(
|
175
|
-
dir = Dir.mktmpdir("custom-wordlist-#{
|
176
|
-
output = File.join(dir, "#{
|
193
|
+
def load_custom_dictionary(lc)
|
194
|
+
dir = Dir.mktmpdir("custom-wordlist-#{lc.full_name}-")
|
195
|
+
output = File.join(dir, "#{lc.full_name}-custom.wlst")
|
177
196
|
cmd = %W[
|
178
|
-
aspell --lang=#{
|
197
|
+
aspell --lang=#{lc.base} --encoding=UTF-8 create master #{output}
|
179
198
|
]
|
180
199
|
value = nil
|
181
200
|
Open3.popen3(*cmd) do |stdin, stdout, stderr, wait_thr|
|
182
|
-
|
201
|
+
dict_path = File.join(@dictionary_root, lc.full_name, 'dict.txt')
|
202
|
+
contents = File.read(dict_path)
|
183
203
|
words = contents.lines.map(&:chomp)
|
184
204
|
words.each do |word|
|
185
205
|
stdin.puts(word)
|
@@ -188,36 +208,44 @@ module Clausewitz; module Spelling
|
|
188
208
|
value = wait_thr.value
|
189
209
|
end
|
190
210
|
unless value.success?
|
191
|
-
fail("Could not generate custom word list for #{
|
211
|
+
fail("Could not generate custom word list for #{lc.full_name}!")
|
192
212
|
end
|
193
213
|
output
|
194
214
|
end
|
195
215
|
|
196
|
-
def load_aspell_checker(
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
216
|
+
def load_aspell_checker(lc)
|
217
|
+
if @loaded_spellcheckers[lc.full_name]
|
218
|
+
return @loaded_spellcheckers[lc.full_name]
|
219
|
+
end
|
220
|
+
aspell_checker = FFI::Aspell::Speller.new(
|
221
|
+
lc.full_name, encoding: 'UTF-8'
|
222
|
+
)
|
203
223
|
aspell_checker.set('ignore-accents', true)
|
204
|
-
if @dictionary_root && @dictionary_root.join(
|
205
|
-
custom_words = load_custom_dictionary(
|
224
|
+
if @dictionary_root && @dictionary_root.join(lc.full_name).exist?
|
225
|
+
custom_words = load_custom_dictionary(lc)
|
206
226
|
aspell_checker.set('extra-dicts', custom_words)
|
207
227
|
end
|
208
|
-
@loaded_spellcheckers[
|
228
|
+
@loaded_spellcheckers[lc.full_name] = aspell_checker
|
229
|
+
end
|
230
|
+
|
231
|
+
def load_wordlist(lc)
|
232
|
+
return @loaded_wordlists[lc.full_name] if @loaded_wordlists[lc.full_name]
|
233
|
+
contents = ''
|
234
|
+
dict_path = @dictionary_root.join(lc.full_name, 'dict.txt')
|
235
|
+
if @dictionary_root && dict_path.exist?
|
236
|
+
contents = File.read(@dictionary_root.join(lc.full_name, 'dict.txt'))
|
237
|
+
end
|
238
|
+
words = contents.lines.to_a.map(&:chomp)
|
239
|
+
@loaded_wordlists[lc.full_name] = Set.new(words)
|
209
240
|
end
|
210
241
|
|
211
|
-
def
|
212
|
-
|
213
|
-
aspell_lang_config = Localisation::LANG_MAP.
|
214
|
-
|
215
|
-
end.first
|
216
|
-
lang_code = aspell_lang_config.last[:base]
|
217
|
-
if @dictionary_root && @dictionary_root.join(lang_code).exist?
|
218
|
-
contents = File.read(@dictionary_root.join(lang_code, 'dict.txt'))
|
242
|
+
def language_config(language_name)
|
243
|
+
language_name = "l_#{language_name}" if language_name !~ /^l_/
|
244
|
+
aspell_lang_config = Localisation::LANG_MAP.find do |config_key, _|
|
245
|
+
language_name == config_key
|
219
246
|
end
|
220
|
-
|
247
|
+
fail("Unknown language '#{language_name}'!") unless aspell_lang_config
|
248
|
+
aspell_lang_config.last
|
221
249
|
end
|
222
250
|
|
223
251
|
# Make sure a file to be checked is actually present and readable.
|