kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
data/lib/kotoshu.rb
ADDED
|
@@ -0,0 +1,493 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# EAGER: Core infrastructure
|
|
4
|
+
require_relative "kotoshu/version"
|
|
5
|
+
require_relative "kotoshu/core"
|
|
6
|
+
require_relative "kotoshu/core/models/word"
|
|
7
|
+
require_relative "kotoshu/core/models/affix_rule"
|
|
8
|
+
require_relative "kotoshu/core/models/result/word_result"
|
|
9
|
+
require_relative "kotoshu/core/models/result/document_result"
|
|
10
|
+
|
|
11
|
+
# EAGER: String metrics (used by algorithms)
|
|
12
|
+
require_relative "kotoshu/string_metrics"
|
|
13
|
+
|
|
14
|
+
# EAGER: Algorithms namespace
|
|
15
|
+
require_relative "kotoshu/algorithms"
|
|
16
|
+
|
|
17
|
+
# EAGER: Algorithms (ported from Spylls)
|
|
18
|
+
require_relative "kotoshu/algorithms/ngram_suggest"
|
|
19
|
+
require_relative "kotoshu/suggestions/suggestion"
|
|
20
|
+
require_relative "kotoshu/suggestions/suggestion_set"
|
|
21
|
+
require_relative "kotoshu/suggestions/context"
|
|
22
|
+
require_relative "kotoshu/suggestions/generator"
|
|
23
|
+
|
|
24
|
+
# EAGER: Dictionary base
|
|
25
|
+
require_relative "kotoshu/dictionary/base"
|
|
26
|
+
require_relative "kotoshu/dictionary/repository"
|
|
27
|
+
|
|
28
|
+
# EAGER: Dictionary backends (load all for now, can optimize later)
|
|
29
|
+
require_relative "kotoshu/dictionary/unix_words"
|
|
30
|
+
require_relative "kotoshu/dictionary/plain_text"
|
|
31
|
+
require_relative "kotoshu/dictionary/custom"
|
|
32
|
+
require_relative "kotoshu/dictionary/hunspell"
|
|
33
|
+
require_relative "kotoshu/dictionary/cspell"
|
|
34
|
+
|
|
35
|
+
# EAGER: Language module (multi-language support)
|
|
36
|
+
require_relative "kotoshu/language"
|
|
37
|
+
|
|
38
|
+
# EAGER: Strategy base
|
|
39
|
+
require_relative "kotoshu/suggestions/strategies/base_strategy"
|
|
40
|
+
|
|
41
|
+
# EAGER: Strategies (load all for now, can optimize later)
|
|
42
|
+
require_relative "kotoshu/suggestions/strategies/edit_distance_strategy"
|
|
43
|
+
require_relative "kotoshu/suggestions/strategies/symspell_strategy"
|
|
44
|
+
require_relative "kotoshu/suggestions/strategies/phonetic_strategy"
|
|
45
|
+
require_relative "kotoshu/suggestions/strategies/keyboard_proximity_strategy"
|
|
46
|
+
require_relative "kotoshu/suggestions/strategies/ngram_strategy"
|
|
47
|
+
require_relative "kotoshu/suggestions/strategies/composite_strategy"
|
|
48
|
+
|
|
49
|
+
# EAGER: Readers for Hunspell files
|
|
50
|
+
require_relative "kotoshu/readers"
|
|
51
|
+
|
|
52
|
+
# EAGER: Configuration and main interface
|
|
53
|
+
require_relative "kotoshu/dictionaries/catalog"
|
|
54
|
+
require_relative "kotoshu/configuration"
|
|
55
|
+
require_relative "kotoshu/spellchecker"
|
|
56
|
+
|
|
57
|
+
module Kotoshu
|
|
58
|
+
# The Kotoshu::Models namespace is opened eagerly by core/models/*.rb.
|
|
59
|
+
# Semantic/embedding model autoloads live there.
|
|
60
|
+
Models.autoload :Context, "kotoshu/models/context"
|
|
61
|
+
Models.autoload :EmbeddingModel, "kotoshu/models/embedding_model"
|
|
62
|
+
Models.autoload :FastTextModel, "kotoshu/models/fasttext_model"
|
|
63
|
+
Models.autoload :NearestNeighbor, "kotoshu/models/nearest_neighbor"
|
|
64
|
+
Models.autoload :OnnxModel, "kotoshu/models/onnx_model"
|
|
65
|
+
Models.autoload :SemanticError, "kotoshu/models/semantic_error"
|
|
66
|
+
Models.autoload :Suggestion, "kotoshu/models/suggestion"
|
|
67
|
+
Models.autoload :WordEmbedding, "kotoshu/models/word_embedding"
|
|
68
|
+
|
|
69
|
+
# LAZY: Trie components (autoload)
|
|
70
|
+
autoload :TrieNode, "kotoshu/core/trie/node"
|
|
71
|
+
autoload :Trie, "kotoshu/core/trie/trie"
|
|
72
|
+
autoload :TrieBuilder, "kotoshu/core/trie/builder"
|
|
73
|
+
|
|
74
|
+
# LAZY: Features (autoload)
|
|
75
|
+
autoload :Defaults, "kotoshu/defaults"
|
|
76
|
+
autoload :PersonalDictionary, "kotoshu/personal_dictionary"
|
|
77
|
+
autoload :ProjectConfig, "kotoshu/project_config"
|
|
78
|
+
autoload :FluentChecker, "kotoshu/fluent_checker"
|
|
79
|
+
autoload :ResourceManager, "kotoshu/resource_manager"
|
|
80
|
+
autoload :ResourceBundle, "kotoshu/resource_bundle"
|
|
81
|
+
autoload :SourceRegistry, "kotoshu/source_registry"
|
|
82
|
+
|
|
83
|
+
# LAZY: Integrity verification (autoload)
|
|
84
|
+
autoload :Integrity, "kotoshu/integrity"
|
|
85
|
+
|
|
86
|
+
# LAZY: FastText integration (autoload)
|
|
87
|
+
autoload :SemanticAnalyzer, "kotoshu/analyzers/semantic_analyzer"
|
|
88
|
+
|
|
89
|
+
# LAZY: Document abstraction (autoload)
|
|
90
|
+
autoload :Location, "kotoshu/documents/location"
|
|
91
|
+
autoload :Document, "kotoshu/documents/document"
|
|
92
|
+
autoload :PlainTextDocument, "kotoshu/documents/plain_text_document"
|
|
93
|
+
autoload :MarkdownDocument, "kotoshu/documents/markdown_document"
|
|
94
|
+
autoload :AsciidocDocument, "kotoshu/documents/asciidoc_document"
|
|
95
|
+
|
|
96
|
+
# LAZY: Cache management (autoload)
|
|
97
|
+
autoload :LanguageCache, "kotoshu/cache/language_cache"
|
|
98
|
+
autoload :ModelCache, "kotoshu/cache/model_cache"
|
|
99
|
+
|
|
100
|
+
# LAZY: Language detection (autoload)
|
|
101
|
+
autoload :LanguageIdentifier, "kotoshu/language/identifier"
|
|
102
|
+
|
|
103
|
+
# LAZY: Development tools (autoload)
|
|
104
|
+
autoload :Debug, "kotoshu/debug_mode"
|
|
105
|
+
autoload :DebugLogger, "kotoshu/debug_logger"
|
|
106
|
+
autoload :Metrics, "kotoshu/metrics_module"
|
|
107
|
+
autoload :MetricsCollector, "kotoshu/metrics_collector"
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
module Kotoshu
|
|
111
|
+
class Error < StandardError; end
|
|
112
|
+
|
|
113
|
+
autoload :Paths, "kotoshu/paths"
|
|
114
|
+
|
|
115
|
+
# Global configuration instance.
|
|
116
|
+
#
|
|
117
|
+
# @return [Configuration] The global configuration
|
|
118
|
+
#
|
|
119
|
+
# @example
|
|
120
|
+
# Kotoshu.configure do |config|
|
|
121
|
+
# config.dictionary_path = "/usr/share/dict/words"
|
|
122
|
+
# config.language = "en-US"
|
|
123
|
+
# end
|
|
124
|
+
def self.configure
|
|
125
|
+
yield configuration if block_given?
|
|
126
|
+
configuration
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
# Get the global configuration.
|
|
130
|
+
#
|
|
131
|
+
# @return [Configuration] The global configuration
|
|
132
|
+
#
|
|
133
|
+
# @example
|
|
134
|
+
# config = Kotoshu.configuration
|
|
135
|
+
def self.configuration
|
|
136
|
+
Configuration.instance
|
|
137
|
+
end
|
|
138
|
+
|
|
139
|
+
# Default spellchecker (singleton). Uses the configured default language.
|
|
140
|
+
# Cache-only — raises ResourceNotSetupError if the default language hasn't
|
|
141
|
+
# been set up via Kotoshu.setup.
|
|
142
|
+
#
|
|
143
|
+
# @return [Spellchecker] The default spellchecker
|
|
144
|
+
# @raise [ResourceNotSetupError] if no language is set up
|
|
145
|
+
def self.spellchecker
|
|
146
|
+
return @spellchecker if @spellchecker
|
|
147
|
+
|
|
148
|
+
lang = configuration.default_language
|
|
149
|
+
raise ResourceNotSetupError.new(lang || "default", "spelling") if lang.nil? || lang.to_s.empty?
|
|
150
|
+
|
|
151
|
+
@spellchecker = spellchecker_for(lang)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Get a spellchecker for a specific language (cache-only, raises on miss).
|
|
155
|
+
#
|
|
156
|
+
# @param language [String, Symbol] Language code (e.g., "en", "de", "fr")
|
|
157
|
+
# @return [Spellchecker] Spellchecker using a ResourceManager-resolved bundle
|
|
158
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
159
|
+
#
|
|
160
|
+
# @example
|
|
161
|
+
# Kotoshu.setup(:de)
|
|
162
|
+
# Kotoshu.spellchecker_for("de").correct?("Hallo") # => true
|
|
163
|
+
def self.spellchecker_for(language)
|
|
164
|
+
key = language.to_s
|
|
165
|
+
@spellcheckers ||= {}
|
|
166
|
+
@spellcheckers[key] ||= begin
|
|
167
|
+
bundle = ResourceManager.resolve(language: language)
|
|
168
|
+
Spellchecker.new(resource_bundle: bundle, config: configuration)
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# Resolve language resources from the cache (no download).
|
|
173
|
+
#
|
|
174
|
+
# @param language [String, Symbol, nil] Language code; if nil, uses default
|
|
175
|
+
# @param want [Array<Symbol>] Resource types (default: [:spelling])
|
|
176
|
+
# @return [ResourceBundle] Resolved bundle
|
|
177
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
178
|
+
#
|
|
179
|
+
# @example
|
|
180
|
+
# Kotoshu.setup(:en)
|
|
181
|
+
# bundle = Kotoshu.resolve(language: "en")
|
|
182
|
+
# bundle.dictionary # => #<Dictionary::Hunspell ...>
|
|
183
|
+
def self.resolve(language: nil, want: nil)
|
|
184
|
+
lang = language || configuration.default_language
|
|
185
|
+
raise ResourceNotSetupError.new(lang || "default", "spelling") if lang.nil?
|
|
186
|
+
|
|
187
|
+
want_param = want || ResourceManager::DEFAULT_WANT
|
|
188
|
+
ResourceManager.resolve(language: lang, want: want_param)
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
# ---- Stage 1: Setup ----
|
|
192
|
+
|
|
193
|
+
# Set up resources for one or more languages (download or register local files).
|
|
194
|
+
# Idempotent: re-running with the same args is a no-op unless `force: true`.
|
|
195
|
+
#
|
|
196
|
+
# @param languages [String, Symbol, Array<String, Symbol>] One or more language codes
|
|
197
|
+
# @param want [Array<Symbol>] Resource types to fetch (default: [:spelling])
|
|
198
|
+
# @param force [Boolean] Re-fetch even if already cached
|
|
199
|
+
# @param strict [Boolean] Re-raise on optional-resource failure
|
|
200
|
+
# @param aff [String, nil] Path to local .aff file (single-language only)
|
|
201
|
+
# @param dic [String, nil] Path to local .dic file (single-language only)
|
|
202
|
+
# @param from [String, nil] Directory containing local .aff/.dic (single-language only)
|
|
203
|
+
# @param frequency [String, nil] Path to local frequency.json (single-language only)
|
|
204
|
+
# @return [SetupResult, Array<SetupResult>] Result or results (array if multiple languages)
|
|
205
|
+
#
|
|
206
|
+
# @example Download from kotoshu/dictionaries
|
|
207
|
+
# Kotoshu.setup(:en) # spelling only
|
|
208
|
+
# Kotoshu.setup(:en, want: %i[spelling frequency]) # spelling + frequency
|
|
209
|
+
# Kotoshu.setup(:en, :de, :fr) # multiple languages
|
|
210
|
+
#
|
|
211
|
+
# @example Register local files (user already has hunspell dicts)
|
|
212
|
+
# Kotoshu.setup(:en, aff: "/usr/share/hunspell/en_US.aff",
|
|
213
|
+
# dic: "/usr/share/hunspell/en_US.dic")
|
|
214
|
+
#
|
|
215
|
+
# @example Register local files from a directory
|
|
216
|
+
# Kotoshu.setup(:en, from: "/usr/share/hunspell/") # looks for en.aff, en.dic
|
|
217
|
+
def self.setup(*languages, want: nil, **opts)
|
|
218
|
+
raise ArgumentError, "Kotoshu.setup requires at least one language" if languages.empty?
|
|
219
|
+
|
|
220
|
+
want_param = want || ResourceManager::DEFAULT_WANT
|
|
221
|
+
if languages.size == 1
|
|
222
|
+
ResourceManager.setup(languages.first, want: want_param, **opts)
|
|
223
|
+
else
|
|
224
|
+
languages.map { |lang| ResourceManager.setup(lang, want: want_param, **opts) }
|
|
225
|
+
end
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# Check if a language (or a specific resource for that language) is set up.
|
|
229
|
+
#
|
|
230
|
+
# @param language [String, Symbol] Language code
|
|
231
|
+
# @param resource [Symbol, nil] :spelling, :frequency, :model, or nil for any
|
|
232
|
+
# @return [Boolean] True if the resource is cached and available
|
|
233
|
+
#
|
|
234
|
+
# @example
|
|
235
|
+
# Kotoshu.setup(:en)
|
|
236
|
+
# Kotoshu.setup?(:en) # => true
|
|
237
|
+
# Kotoshu.setup?(:en, :spelling) # => true
|
|
238
|
+
# Kotoshu.setup?(:en, :frequency) # => false (not set up)
|
|
239
|
+
def self.setup?(language, resource = nil)
|
|
240
|
+
ResourceManager.setup?(language, resource: resource)
|
|
241
|
+
end
|
|
242
|
+
|
|
243
|
+
# List languages that have been set up.
|
|
244
|
+
#
|
|
245
|
+
# @return [Array<String>] Sorted array of language codes with cached spelling
|
|
246
|
+
#
|
|
247
|
+
# @example
|
|
248
|
+
# Kotoshu.languages_setup # => ["de", "en", "fr"]
|
|
249
|
+
def self.languages_setup
|
|
250
|
+
ResourceManager.languages_setup
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
# Reset the spellchecker cache. The next call to `spellchecker` or
|
|
254
|
+
# `spellchecker_for` re-resolves from the current configuration.
|
|
255
|
+
#
|
|
256
|
+
# Does NOT eagerly reload — clearing the cache is enough. This makes
|
|
257
|
+
# the method safe to call between tests even when no language is set
|
|
258
|
+
# up yet (the next call will raise ResourceNotSetupError per the
|
|
259
|
+
# strict two-stage contract).
|
|
260
|
+
def self.reset_spellchecker
|
|
261
|
+
@spellchecker = nil
|
|
262
|
+
@spellcheckers = nil
|
|
263
|
+
nil
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
# Check if a word is spelled correctly.
|
|
267
|
+
# Hot path — cache-only, raises if language not set up.
|
|
268
|
+
#
|
|
269
|
+
# @param word [String] The word to check
|
|
270
|
+
# @param language [String, Symbol, nil] Language code; if nil, uses configured default
|
|
271
|
+
# @return [Boolean] True if the word is correct
|
|
272
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
273
|
+
#
|
|
274
|
+
# @example
|
|
275
|
+
# Kotoshu.setup(:en)
|
|
276
|
+
# Kotoshu.correct?("hello") # => true
|
|
277
|
+
# Kotoshu.correct?("Hallo", language: "de") # requires Kotoshu.setup(:de) first
|
|
278
|
+
def self.correct?(word, language: nil)
|
|
279
|
+
checker = language ? spellchecker_for(language) : spellchecker
|
|
280
|
+
checker.correct?(word)
|
|
281
|
+
end
|
|
282
|
+
|
|
283
|
+
# Check if a word is misspelled. Hot path.
|
|
284
|
+
#
|
|
285
|
+
# @param word [String] The word to check
|
|
286
|
+
# @param language [String, Symbol, nil] Language code
|
|
287
|
+
# @return [Boolean] True if the word is misspelled
|
|
288
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
289
|
+
def self.misspelled?(word, language: nil)
|
|
290
|
+
!correct?(word, language: language)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
# Get spelling suggestions for a word. Hot path.
|
|
294
|
+
#
|
|
295
|
+
# @param word [String] The misspelled word
|
|
296
|
+
# @param language [String, Symbol, nil] Language code
|
|
297
|
+
# @param options [Hash] Options (max_suggestions, etc.)
|
|
298
|
+
# @return [Suggestions::SuggestionSet] Generated suggestions
|
|
299
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
300
|
+
#
|
|
301
|
+
# @example
|
|
302
|
+
# Kotoshu.setup(:en)
|
|
303
|
+
# suggestions = Kotoshu.suggest("helo")
|
|
304
|
+
# suggestions.to_words # => ["hello", "help", "held", ...]
|
|
305
|
+
def self.suggest(word, language: nil, **options)
|
|
306
|
+
checker = language ? spellchecker_for(language) : spellchecker
|
|
307
|
+
checker.suggest(word, **options)
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
# Check text for spelling errors. Hot path.
|
|
311
|
+
#
|
|
312
|
+
# @param text [String] The text to check
|
|
313
|
+
# @param language [String, Symbol, nil] Language code; if nil, uses configured default
|
|
314
|
+
# @param options [Hash] Options
|
|
315
|
+
# @return [Models::Result::DocumentResult] The check result
|
|
316
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
317
|
+
#
|
|
318
|
+
# @example
|
|
319
|
+
# Kotoshu.setup(:en)
|
|
320
|
+
# result = Kotoshu.check("Hello wrold")
|
|
321
|
+
# result.errors.map(&:word) # => ["wrold"]
|
|
322
|
+
def self.check(text, language: nil, **_options)
|
|
323
|
+
checker = language ? spellchecker_for(language) : spellchecker
|
|
324
|
+
checker.check(text)
|
|
325
|
+
end
|
|
326
|
+
|
|
327
|
+
# Check a file for spelling errors. Hot path.
|
|
328
|
+
#
|
|
329
|
+
# @param path [String] The file path
|
|
330
|
+
# @param language [String, Symbol, nil] Language code
|
|
331
|
+
# @param options [Hash] Options
|
|
332
|
+
# @return [Models::Result::DocumentResult] The check result
|
|
333
|
+
# @raise [ResourceNotSetupError] if the language hasn't been set up
|
|
334
|
+
#
|
|
335
|
+
# @example
|
|
336
|
+
# Kotoshu.setup(:en)
|
|
337
|
+
# result = Kotoshu.check_file("README.md")
|
|
338
|
+
# result.success? # => false
|
|
339
|
+
def self.check_file(path, language: nil, **_options)
|
|
340
|
+
checker = language ? spellchecker_for(language) : spellchecker
|
|
341
|
+
checker.check_file(path)
|
|
342
|
+
end
|
|
343
|
+
|
|
344
|
+
# Check multiple files for spelling errors.
|
|
345
|
+
#
|
|
346
|
+
# @param paths [Array<String>] The file paths
|
|
347
|
+
# @param options [Hash] Options
|
|
348
|
+
# @return [Array<Models::Result::DocumentResult>] Results for each file
|
|
349
|
+
#
|
|
350
|
+
# @example
|
|
351
|
+
# results = Kotoshu.check_files(%w[README.md CHANGELOG.md])
|
|
352
|
+
# results.select(&:failed?)
|
|
353
|
+
def self.check_files(paths, **options)
|
|
354
|
+
paths.map { |path| check_file(path, **options) }
|
|
355
|
+
end
|
|
356
|
+
|
|
357
|
+
# Convenience method for creating an indexed dictionary.
|
|
358
|
+
#
|
|
359
|
+
# @param source [Array<String>, Hash, nil] Words or file path
|
|
360
|
+
# @return [Core::IndexedDictionary] New dictionary
|
|
361
|
+
def self.dictionary(source = nil)
|
|
362
|
+
case source
|
|
363
|
+
when Array
|
|
364
|
+
Core::IndexedDictionary.new(source)
|
|
365
|
+
when String
|
|
366
|
+
Core::IndexedDictionary.from_file(source)
|
|
367
|
+
when nil, Hash
|
|
368
|
+
Core::IndexedDictionary.new
|
|
369
|
+
else
|
|
370
|
+
raise ArgumentError, "Invalid dictionary source: #{source.inspect}"
|
|
371
|
+
end
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Convenience method for creating a trie.
|
|
375
|
+
#
|
|
376
|
+
# @param source [Array<String>, String, nil] Words or file path
|
|
377
|
+
# @return [Core::Trie::Trie] New trie
|
|
378
|
+
def self.trie(source = nil)
|
|
379
|
+
case source
|
|
380
|
+
when Array
|
|
381
|
+
Core::Trie::Builder.from_array(source)
|
|
382
|
+
when String
|
|
383
|
+
if File.exist?(source)
|
|
384
|
+
Core::Trie::Builder.from_file(source)
|
|
385
|
+
else
|
|
386
|
+
Core::Trie::Builder.from_string(source)
|
|
387
|
+
end
|
|
388
|
+
when nil
|
|
389
|
+
Core::Trie::Trie.new
|
|
390
|
+
else
|
|
391
|
+
raise ArgumentError, "Invalid trie source: #{source.inspect}"
|
|
392
|
+
end
|
|
393
|
+
end
|
|
394
|
+
|
|
395
|
+
# Convenience method for creating a suggestion pipeline.
|
|
396
|
+
#
|
|
397
|
+
# @param strategies [Array] Optional strategies to add
|
|
398
|
+
# @return [Suggestions::Strategies::CompositeStrategy] New pipeline
|
|
399
|
+
def self.suggestion_pipeline(*strategies)
|
|
400
|
+
pipeline = Suggestions::Strategies::CompositeStrategy.new(name: :default)
|
|
401
|
+
strategies.each { |s| pipeline.add(s) }
|
|
402
|
+
pipeline
|
|
403
|
+
end
|
|
404
|
+
|
|
405
|
+
# Register a custom dictionary type.
|
|
406
|
+
#
|
|
407
|
+
# @param type [Symbol] The type key
|
|
408
|
+
# @param klass [Class] The dictionary class
|
|
409
|
+
#
|
|
410
|
+
# @example
|
|
411
|
+
# Kotoshu.register_dictionary_type(:my_custom, MyDictionary)
|
|
412
|
+
def self.register_dictionary_type(type, klass)
|
|
413
|
+
Dictionary.register_type(type, klass)
|
|
414
|
+
end
|
|
415
|
+
|
|
416
|
+
# Register a custom suggestion algorithm.
|
|
417
|
+
#
|
|
418
|
+
# @param name [Symbol] The algorithm name
|
|
419
|
+
# @param klass [Class] The algorithm class
|
|
420
|
+
#
|
|
421
|
+
# @example
|
|
422
|
+
# Kotoshu.register_suggestion_algorithm(:my_custom, MyStrategy)
|
|
423
|
+
def self.register_suggestion_algorithm(name, klass)
|
|
424
|
+
Suggestions::Strategies::BaseStrategy.register_type(name, klass)
|
|
425
|
+
end
|
|
426
|
+
|
|
427
|
+
# Access the language module.
|
|
428
|
+
#
|
|
429
|
+
# @return [Module] The Language module
|
|
430
|
+
#
|
|
431
|
+
# @example
|
|
432
|
+
# Kotoshu::Language.detect("Hello world") # => "en"
|
|
433
|
+
def self.language
|
|
434
|
+
Language
|
|
435
|
+
end
|
|
436
|
+
|
|
437
|
+
# Detect language of text.
|
|
438
|
+
#
|
|
439
|
+
# @param text [String] Text to analyze
|
|
440
|
+
# @return [String, nil] Detected language code
|
|
441
|
+
#
|
|
442
|
+
# @example
|
|
443
|
+
# Kotoshu.detect_language("Bonjour le monde") # => "fr"
|
|
444
|
+
# Kotoshu.detect_language("こんにちは") # => "ja"
|
|
445
|
+
def self.detect_language(text)
|
|
446
|
+
Language.detect(text)
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
# Detect language with confidence score.
|
|
450
|
+
#
|
|
451
|
+
# @param text [String] Text to analyze
|
|
452
|
+
# @return [Array<String, Float>] Language code and confidence
|
|
453
|
+
#
|
|
454
|
+
# @example
|
|
455
|
+
# lang, conf = Kotoshu.detect_language_with_confidence("Hello world")
|
|
456
|
+
# lang # => "en"
|
|
457
|
+
# conf # => 0.85
|
|
458
|
+
def self.detect_language_with_confidence(text)
|
|
459
|
+
Language.detect_with_confidence(text)
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
# Get language class by code.
|
|
463
|
+
#
|
|
464
|
+
# @param code [String] Language code (e.g., "en-US", "de-DE")
|
|
465
|
+
# @return [Class, nil] Language class or nil
|
|
466
|
+
#
|
|
467
|
+
# @example
|
|
468
|
+
# Kotoshu.get_language("en-US")
|
|
469
|
+
def self.get_language(code)
|
|
470
|
+
Language.get(code)
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
# Check if a language is registered.
|
|
474
|
+
#
|
|
475
|
+
# @param code [String] Language code
|
|
476
|
+
# @return [Boolean] True if registered
|
|
477
|
+
#
|
|
478
|
+
# @example
|
|
479
|
+
# Kotoshu.language_registered?("en-US") # => true or false
|
|
480
|
+
def self.language_registered?(code)
|
|
481
|
+
Language.registered?(code)
|
|
482
|
+
end
|
|
483
|
+
|
|
484
|
+
# Get all supported language codes.
|
|
485
|
+
#
|
|
486
|
+
# @return [Array<String>] List of language codes
|
|
487
|
+
#
|
|
488
|
+
# @example
|
|
489
|
+
# Kotoshu.supported_languages # => ["de-DE", "en-US", "fr-FR", ...]
|
|
490
|
+
def self.supported_languages
|
|
491
|
+
Language.supported_codes
|
|
492
|
+
end
|
|
493
|
+
end
|