kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,295 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "resource_bundle"
|
|
4
|
+
require_relative "cache/language_cache"
|
|
5
|
+
require_relative "cache/frequency_cache"
|
|
6
|
+
require_relative "cache/model_cache"
|
|
7
|
+
require_relative "dictionary/hunspell"
|
|
8
|
+
require_relative "core/exceptions"
|
|
9
|
+
|
|
10
|
+
module Kotoshu
|
|
11
|
+
# Two-stage resource resolution.
|
|
12
|
+
#
|
|
13
|
+
# Stage 1 — setup (slow, network-required, explicit):
|
|
14
|
+
# Kotoshu.setup(:en) # download from kotoshu/dictionaries
|
|
15
|
+
# Kotoshu.setup(:en, want: %i[spelling frequency])
|
|
16
|
+
# Kotoshu.setup(:en, aff: "/path/to.en.aff", dic: "/path/to/en.dic") # local files
|
|
17
|
+
# Kotoshu.setup(:en, from: "/path/to/dict/dir/") # local directory
|
|
18
|
+
#
|
|
19
|
+
# Stage 2 — resolve (instant, cache-only, raises on miss):
|
|
20
|
+
# bundle = Kotoshu::ResourceManager.resolve(language: "en")
|
|
21
|
+
# bundle.dictionary # => #<Dictionary::Hunspell ...>
|
|
22
|
+
#
|
|
23
|
+
# The hot path (Kotoshu.correct?, .check, .suggest, .spellchecker_for) calls
|
|
24
|
+
# resolve and lets ResourceNotSetupError propagate. Setup is never implicit.
|
|
25
|
+
class ResourceManager
|
|
26
|
+
DEFAULT_WANT = %i[spelling].freeze
|
|
27
|
+
|
|
28
|
+
SetupResult = Struct.new(
|
|
29
|
+
:language,
|
|
30
|
+
:spelling, # :downloaded | :local | :cached | nil
|
|
31
|
+
:frequency, # :downloaded | :local | :cached | :unavailable | nil
|
|
32
|
+
:model, # :downloaded | :cached | :unavailable | nil
|
|
33
|
+
:source, # :kotoshu | :local
|
|
34
|
+
keyword_init: true
|
|
35
|
+
) do
|
|
36
|
+
def success?
|
|
37
|
+
!spelling.nil? || !frequency.nil?
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
class << self
|
|
42
|
+
def setup(language, want: DEFAULT_WANT, force: false, strict: false, **opts)
|
|
43
|
+
new.setup(language: language, want: want, force: force, strict: strict, **opts)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def setup_from_local(language:, aff:, dic:, frequency: nil, force: false)
|
|
47
|
+
new.setup_from_local(language: language, aff: aff, dic: dic, frequency: frequency, force: force)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def resolve(language:, want: DEFAULT_WANT)
|
|
51
|
+
new.resolve(language: language, want: want)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def setup?(language, resource: nil)
|
|
55
|
+
new.setup?(language, resource: resource)
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def languages_setup
|
|
59
|
+
new.languages_setup
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# ---- Stage 1: setup ----
|
|
64
|
+
|
|
65
|
+
def setup(language:, want: DEFAULT_WANT, force: false, strict: false,
|
|
66
|
+
aff: nil, dic: nil, from: nil, frequency: nil)
|
|
67
|
+
lang = normalize_language(language)
|
|
68
|
+
|
|
69
|
+
if aff || dic || from
|
|
70
|
+
setup_from_local(language: lang, aff: aff, dic: dic, from: from,
|
|
71
|
+
frequency: frequency, force: force)
|
|
72
|
+
else
|
|
73
|
+
setup_from_remote(lang, want: want, force: force, strict: strict)
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
def setup_from_local(language:, aff:, dic:, from: nil, frequency: nil, force: false)
|
|
78
|
+
lang = normalize_language(language)
|
|
79
|
+
|
|
80
|
+
aff_path, dic_path = resolve_local_paths(lang, aff: aff, dic: dic, from: from)
|
|
81
|
+
raise ArgumentError, "aff file not found: #{aff_path}" unless File.exist?(aff_path)
|
|
82
|
+
raise ArgumentError, "dic file not found: #{dic_path}" unless File.exist?(dic_path)
|
|
83
|
+
|
|
84
|
+
spelling_cache = spelling_cache_for(lang)
|
|
85
|
+
spelling_cache.install_local(lang, aff: aff_path, dic: dic_path, force: force)
|
|
86
|
+
spelling_status = :local
|
|
87
|
+
|
|
88
|
+
frequency_status = nil
|
|
89
|
+
if frequency
|
|
90
|
+
raise ArgumentError, "frequency file not found: #{frequency}" unless File.exist?(frequency)
|
|
91
|
+
freq_cache = frequency_cache_for
|
|
92
|
+
freq_cache.install_local(lang, path: frequency, force: force) if freq_cache.respond_to?(:install_local)
|
|
93
|
+
frequency_status = :local
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
SetupResult.new(
|
|
97
|
+
language: lang,
|
|
98
|
+
spelling: spelling_status,
|
|
99
|
+
frequency: frequency_status,
|
|
100
|
+
model: nil,
|
|
101
|
+
source: :local
|
|
102
|
+
)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# ---- Stage 2: resolve (cache-only) ----
|
|
106
|
+
|
|
107
|
+
def resolve(language:, want: DEFAULT_WANT)
|
|
108
|
+
lang = normalize_language(language)
|
|
109
|
+
|
|
110
|
+
spelling_dict = want.include?(:spelling) ? resolve_spelling_cached(lang) : nil
|
|
111
|
+
frequency_data = want.include?(:frequency) ? resolve_frequency_cached(lang) : nil
|
|
112
|
+
model = want.include?(:model) ? resolve_model_cached(lang) : nil
|
|
113
|
+
|
|
114
|
+
ResourceBundle.new(
|
|
115
|
+
language: lang,
|
|
116
|
+
dictionary: spelling_dict,
|
|
117
|
+
frequency: frequency_data,
|
|
118
|
+
model: model,
|
|
119
|
+
rules: nil,
|
|
120
|
+
cached: true,
|
|
121
|
+
source_urls: []
|
|
122
|
+
)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# ---- Predicates ----
|
|
126
|
+
|
|
127
|
+
def setup?(language, resource: nil)
|
|
128
|
+
lang = normalize_language(language)
|
|
129
|
+
case resource&.to_sym
|
|
130
|
+
when nil, :spelling
|
|
131
|
+
spelling_cache_for(lang).available?("#{lang}:spelling")
|
|
132
|
+
when :frequency
|
|
133
|
+
fc = frequency_cache_for
|
|
134
|
+
fc.respond_to?(:supports_resource?) && fc.supports_resource?(lang) && fc.available?(lang)
|
|
135
|
+
when :model
|
|
136
|
+
model_cache_for.available?("#{lang}:onnx")
|
|
137
|
+
else
|
|
138
|
+
false
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def languages_setup
|
|
143
|
+
spelling_cache_for(nil).cached_resources
|
|
144
|
+
.map { |r| r.to_s.split(":").first }
|
|
145
|
+
.uniq
|
|
146
|
+
.sort
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
private
|
|
150
|
+
|
|
151
|
+
def setup_from_remote(lang, want:, force:, strict:)
|
|
152
|
+
config = Configuration.instance
|
|
153
|
+
spelling_status = nil
|
|
154
|
+
frequency_status = nil
|
|
155
|
+
model_status = nil
|
|
156
|
+
|
|
157
|
+
if want.include?(:spelling)
|
|
158
|
+
cache = spelling_cache_for(lang, config: config)
|
|
159
|
+
was_cached = cache.available?("#{lang}:spelling")
|
|
160
|
+
if was_cached && !force
|
|
161
|
+
spelling_status = :cached
|
|
162
|
+
else
|
|
163
|
+
warn "[#{lang}] downloading spelling dictionary..." unless quiet?
|
|
164
|
+
cache.get_spelling(lang, force_download: force)
|
|
165
|
+
spelling_status = :downloaded
|
|
166
|
+
end
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
if want.include?(:frequency)
|
|
170
|
+
frequency_status = setup_frequency_remote(lang, force: force, strict: strict, config: config)
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
if want.include?(:model)
|
|
174
|
+
model_status = setup_model_remote(lang, want: want, force: force, strict: strict, config: config)
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
SetupResult.new(
|
|
178
|
+
language: lang,
|
|
179
|
+
spelling: spelling_status,
|
|
180
|
+
frequency: frequency_status,
|
|
181
|
+
model: model_status,
|
|
182
|
+
source: :kotoshu
|
|
183
|
+
)
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def setup_frequency_remote(lang, force:, strict:, config:)
|
|
187
|
+
cache = frequency_cache_for(config: config)
|
|
188
|
+
return :unavailable unless cache.respond_to?(:supports_resource?) && cache.supports_resource?(lang)
|
|
189
|
+
|
|
190
|
+
was_cached = cache.available?(lang)
|
|
191
|
+
return :cached if was_cached && !force
|
|
192
|
+
|
|
193
|
+
warn "[#{lang}] downloading frequency data..." unless quiet?
|
|
194
|
+
cache.get(lang, force_download: force) if cache.respond_to?(:get)
|
|
195
|
+
:downloaded
|
|
196
|
+
rescue StandardError => e
|
|
197
|
+
raise if strict
|
|
198
|
+
|
|
199
|
+
warn "[#{lang}] frequency data unavailable: #{e.class} (#{e.message})" unless quiet?
|
|
200
|
+
:unavailable
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
def setup_model_remote(lang, want:, force:, strict:, config:)
|
|
204
|
+
return :unavailable unless Cache::ModelCache::AVAILABLE_MODELS[:onnx].key?(lang.to_sym)
|
|
205
|
+
|
|
206
|
+
cache = model_cache_for(config: config)
|
|
207
|
+
resource_id = "#{lang}:onnx"
|
|
208
|
+
was_cached = cache.available?(resource_id)
|
|
209
|
+
return :cached if was_cached && !force
|
|
210
|
+
|
|
211
|
+
warn "[#{lang}] downloading ONNX model..." unless quiet?
|
|
212
|
+
cache.get(resource_id, force_download: force)
|
|
213
|
+
:downloaded
|
|
214
|
+
rescue StandardError => e
|
|
215
|
+
raise if strict
|
|
216
|
+
|
|
217
|
+
warn "[#{lang}] ONNX model unavailable: #{e.class} (#{e.message})" unless quiet?
|
|
218
|
+
:unavailable
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
def resolve_spelling_cached(lang)
|
|
222
|
+
cache = spelling_cache_for(lang)
|
|
223
|
+
resource_id = "#{lang}:spelling"
|
|
224
|
+
raise ResourceNotSetupError.new(lang, "spelling") unless cache.available?(resource_id)
|
|
225
|
+
|
|
226
|
+
result = cache.get(resource_id) || cache.load_cached(resource_id)
|
|
227
|
+
raise ResourceNotSetupError.new(lang, "spelling") unless result
|
|
228
|
+
|
|
229
|
+
Dictionary::Hunspell.new(
|
|
230
|
+
dic_path: result[:dic_path] || result["dic_path"],
|
|
231
|
+
aff_path: result[:aff_path] || result["aff_path"],
|
|
232
|
+
language_code: lang
|
|
233
|
+
)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def resolve_frequency_cached(lang)
|
|
237
|
+
cache = frequency_cache_for
|
|
238
|
+
return nil unless cache.respond_to?(:supports_resource?) && cache.supports_resource?(lang)
|
|
239
|
+
raise ResourceNotSetupError.new(lang, "frequency") unless cache.available?(lang)
|
|
240
|
+
|
|
241
|
+
cache.get(lang) rescue nil
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def resolve_model_cached(lang)
|
|
245
|
+
cache = model_cache_for
|
|
246
|
+
resource_id = "#{lang}:onnx"
|
|
247
|
+
return nil unless Cache::ModelCache::AVAILABLE_MODELS[:onnx].key?(lang.to_sym)
|
|
248
|
+
raise ResourceNotSetupError.new(lang, "model") unless cache.available?(resource_id)
|
|
249
|
+
|
|
250
|
+
cache.get(resource_id) rescue nil
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def resolve_local_paths(lang, aff:, dic:, from:)
|
|
254
|
+
if from
|
|
255
|
+
dir = File.expand_path(from)
|
|
256
|
+
aff_path = aff || File.join(dir, "#{lang}.aff")
|
|
257
|
+
dic_path = dic || File.join(dir, "#{lang}.dic")
|
|
258
|
+
[aff_path, dic_path]
|
|
259
|
+
else
|
|
260
|
+
[File.expand_path(aff), File.expand_path(dic)]
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
def normalize_language(code)
|
|
265
|
+
code.to_s.split("-").first.split("_").first.downcase
|
|
266
|
+
end
|
|
267
|
+
|
|
268
|
+
def spelling_cache_for(_lang = nil, config: nil)
|
|
269
|
+
cfg = config || Configuration.instance
|
|
270
|
+
Cache::LanguageCache.new(
|
|
271
|
+
cache_path: cfg.cache_path,
|
|
272
|
+
resource_pin: cfg.resource_pin
|
|
273
|
+
)
|
|
274
|
+
end
|
|
275
|
+
|
|
276
|
+
def frequency_cache_for(config: nil)
|
|
277
|
+
cfg = config || Configuration.instance
|
|
278
|
+
Cache::FrequencyCache.new(
|
|
279
|
+
cache_path: cfg.cache_path,
|
|
280
|
+
resource_pin: cfg.resource_pin
|
|
281
|
+
)
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
def model_cache_for(config: nil)
|
|
285
|
+
cfg = config || Configuration.instance
|
|
286
|
+
Cache::ModelCache.new(
|
|
287
|
+
cache_path: cfg.cache_path
|
|
288
|
+
)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
def quiet?
|
|
292
|
+
!$stderr.tty? || ENV["KOTOSHU_QUIET"] == "1"
|
|
293
|
+
end
|
|
294
|
+
end
|
|
295
|
+
end
|
|
@@ -0,0 +1,165 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Results
|
|
5
|
+
# Result pattern for explicit error handling.
|
|
6
|
+
#
|
|
7
|
+
# Provides a type-safe way to handle operations that can fail
|
|
8
|
+
# without using exceptions. Based on functional programming patterns.
|
|
9
|
+
#
|
|
10
|
+
# @example Using Success
|
|
11
|
+
# result = Result::Success.new("value")
|
|
12
|
+
# result.success? # => true
|
|
13
|
+
# result.value # => "value"
|
|
14
|
+
#
|
|
15
|
+
# @example Using Failure
|
|
16
|
+
# result = Result::Failure.new(error)
|
|
17
|
+
# result.failure? # => true
|
|
18
|
+
# result.error # => the error
|
|
19
|
+
#
|
|
20
|
+
# @example Chaining operations
|
|
21
|
+
# result = Result::Success.new(5)
|
|
22
|
+
# .and_then { |v| Success.new(v * 2) } # Only called if success
|
|
23
|
+
# .or_else { |e| Success.new(0) } # Only called if failure
|
|
24
|
+
module Result
|
|
25
|
+
# Base result class.
|
|
26
|
+
#
|
|
27
|
+
# @abstract
|
|
28
|
+
class Base
|
|
29
|
+
# Check if result is successful.
|
|
30
|
+
#
|
|
31
|
+
# @return [Boolean] True if successful
|
|
32
|
+
def success?
|
|
33
|
+
is_a?(Success)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Check if result is a failure.
|
|
37
|
+
#
|
|
38
|
+
# @return [Boolean] True if failed
|
|
39
|
+
def failure?
|
|
40
|
+
is_a?(Failure)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Map the value if successful.
|
|
44
|
+
#
|
|
45
|
+
# @yield [value] The wrapped value
|
|
46
|
+
# @return [Result::Success, Result::Failure] Mapped result
|
|
47
|
+
def map
|
|
48
|
+
return self if failure?
|
|
49
|
+
|
|
50
|
+
Success.new(yield value)
|
|
51
|
+
rescue StandardError => e
|
|
52
|
+
Failure.new(e)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Chain operations if successful.
|
|
56
|
+
#
|
|
57
|
+
# @yield [value] The wrapped value
|
|
58
|
+
# @return [Result::Success, Result::Failure] Chained result
|
|
59
|
+
def and_then
|
|
60
|
+
return self if failure?
|
|
61
|
+
|
|
62
|
+
result = yield value
|
|
63
|
+
|
|
64
|
+
# Ensure we get a Result back
|
|
65
|
+
result.is_a?(Base) ? result : Success.new(result)
|
|
66
|
+
rescue StandardError => e
|
|
67
|
+
Failure.new(e)
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Recover from failure.
|
|
71
|
+
#
|
|
72
|
+
# @yield [error] The wrapped error
|
|
73
|
+
# @return [Result::Success, Result::Failure] Recovered result
|
|
74
|
+
def or_else
|
|
75
|
+
return self if success?
|
|
76
|
+
|
|
77
|
+
result = yield error
|
|
78
|
+
|
|
79
|
+
# Ensure we get a Result back
|
|
80
|
+
result.is_a?(Base) ? result : Success.new(result)
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
# Unwrap the value or raise error.
|
|
84
|
+
#
|
|
85
|
+
# @return [Object] The wrapped value
|
|
86
|
+
# @raise [Error] The wrapped error if this is a Failure
|
|
87
|
+
def unwrap
|
|
88
|
+
return value if success?
|
|
89
|
+
|
|
90
|
+
raise error
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
# Get the wrapped value (nil for Failure).
|
|
94
|
+
#
|
|
95
|
+
# @return [Object, nil] The wrapped value or nil
|
|
96
|
+
def value
|
|
97
|
+
raise NotImplementedError
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
# Get the wrapped error (nil for Success).
|
|
101
|
+
#
|
|
102
|
+
# @return [StandardError, nil] The wrapped error or nil
|
|
103
|
+
def error
|
|
104
|
+
raise NotImplementedError
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
# Represents a successful operation.
|
|
109
|
+
#
|
|
110
|
+
class Success < Base
|
|
111
|
+
# @return [Object] The wrapped value
|
|
112
|
+
attr_reader :value
|
|
113
|
+
|
|
114
|
+
# Create a new Success result.
|
|
115
|
+
#
|
|
116
|
+
# @param value [Object] The wrapped value
|
|
117
|
+
def initialize(value)
|
|
118
|
+
@value = value
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
# Get the error (always nil for Success).
|
|
122
|
+
#
|
|
123
|
+
# @return [nil] Always nil
|
|
124
|
+
def error
|
|
125
|
+
nil
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Represents a failed operation.
|
|
130
|
+
#
|
|
131
|
+
class Failure < Base
|
|
132
|
+
# @return [StandardError] The wrapped error
|
|
133
|
+
attr_reader :error
|
|
134
|
+
|
|
135
|
+
# Create a new Failure result.
|
|
136
|
+
#
|
|
137
|
+
# @param error [StandardError] The wrapped error
|
|
138
|
+
def initialize(error)
|
|
139
|
+
@error = error
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
# Map does nothing for Failure.
|
|
143
|
+
#
|
|
144
|
+
# @return [Failure] Self
|
|
145
|
+
def map
|
|
146
|
+
self
|
|
147
|
+
end
|
|
148
|
+
|
|
149
|
+
# and_then does nothing for Failure.
|
|
150
|
+
#
|
|
151
|
+
# @return [Failure] Self
|
|
152
|
+
def and_then
|
|
153
|
+
self
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# Get the value (always nil for Failure).
|
|
157
|
+
#
|
|
158
|
+
# @return [nil] Always nil
|
|
159
|
+
def value
|
|
160
|
+
nil
|
|
161
|
+
end
|
|
162
|
+
end
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
end
|