kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
data/lib/kotoshu.rb ADDED
@@ -0,0 +1,493 @@
1
+ # frozen_string_literal: true
2
+
3
+ # EAGER: Core infrastructure
4
+ require_relative "kotoshu/version"
5
+ require_relative "kotoshu/core"
6
+ require_relative "kotoshu/core/models/word"
7
+ require_relative "kotoshu/core/models/affix_rule"
8
+ require_relative "kotoshu/core/models/result/word_result"
9
+ require_relative "kotoshu/core/models/result/document_result"
10
+
11
+ # EAGER: String metrics (used by algorithms)
12
+ require_relative "kotoshu/string_metrics"
13
+
14
+ # EAGER: Algorithms namespace
15
+ require_relative "kotoshu/algorithms"
16
+
17
+ # EAGER: Algorithms (ported from Spylls)
18
+ require_relative "kotoshu/algorithms/ngram_suggest"
19
+ require_relative "kotoshu/suggestions/suggestion"
20
+ require_relative "kotoshu/suggestions/suggestion_set"
21
+ require_relative "kotoshu/suggestions/context"
22
+ require_relative "kotoshu/suggestions/generator"
23
+
24
+ # EAGER: Dictionary base
25
+ require_relative "kotoshu/dictionary/base"
26
+ require_relative "kotoshu/dictionary/repository"
27
+
28
+ # EAGER: Dictionary backends (load all for now, can optimize later)
29
+ require_relative "kotoshu/dictionary/unix_words"
30
+ require_relative "kotoshu/dictionary/plain_text"
31
+ require_relative "kotoshu/dictionary/custom"
32
+ require_relative "kotoshu/dictionary/hunspell"
33
+ require_relative "kotoshu/dictionary/cspell"
34
+
35
+ # EAGER: Language module (multi-language support)
36
+ require_relative "kotoshu/language"
37
+
38
+ # EAGER: Strategy base
39
+ require_relative "kotoshu/suggestions/strategies/base_strategy"
40
+
41
+ # EAGER: Strategies (load all for now, can optimize later)
42
+ require_relative "kotoshu/suggestions/strategies/edit_distance_strategy"
43
+ require_relative "kotoshu/suggestions/strategies/symspell_strategy"
44
+ require_relative "kotoshu/suggestions/strategies/phonetic_strategy"
45
+ require_relative "kotoshu/suggestions/strategies/keyboard_proximity_strategy"
46
+ require_relative "kotoshu/suggestions/strategies/ngram_strategy"
47
+ require_relative "kotoshu/suggestions/strategies/composite_strategy"
48
+
49
+ # EAGER: Readers for Hunspell files
50
+ require_relative "kotoshu/readers"
51
+
52
+ # EAGER: Configuration and main interface
53
+ require_relative "kotoshu/dictionaries/catalog"
54
+ require_relative "kotoshu/configuration"
55
+ require_relative "kotoshu/spellchecker"
56
+
57
+ module Kotoshu
58
+ # The Kotoshu::Models namespace is opened eagerly by core/models/*.rb.
59
+ # Semantic/embedding model autoloads live there.
60
+ Models.autoload :Context, "kotoshu/models/context"
61
+ Models.autoload :EmbeddingModel, "kotoshu/models/embedding_model"
62
+ Models.autoload :FastTextModel, "kotoshu/models/fasttext_model"
63
+ Models.autoload :NearestNeighbor, "kotoshu/models/nearest_neighbor"
64
+ Models.autoload :OnnxModel, "kotoshu/models/onnx_model"
65
+ Models.autoload :SemanticError, "kotoshu/models/semantic_error"
66
+ Models.autoload :Suggestion, "kotoshu/models/suggestion"
67
+ Models.autoload :WordEmbedding, "kotoshu/models/word_embedding"
68
+
69
+ # LAZY: Trie components (autoload)
70
+ autoload :TrieNode, "kotoshu/core/trie/node"
71
+ autoload :Trie, "kotoshu/core/trie/trie"
72
+ autoload :TrieBuilder, "kotoshu/core/trie/builder"
73
+
74
+ # LAZY: Features (autoload)
75
+ autoload :Defaults, "kotoshu/defaults"
76
+ autoload :PersonalDictionary, "kotoshu/personal_dictionary"
77
+ autoload :ProjectConfig, "kotoshu/project_config"
78
+ autoload :FluentChecker, "kotoshu/fluent_checker"
79
+ autoload :ResourceManager, "kotoshu/resource_manager"
80
+ autoload :ResourceBundle, "kotoshu/resource_bundle"
81
+ autoload :SourceRegistry, "kotoshu/source_registry"
82
+
83
+ # LAZY: Integrity verification (autoload)
84
+ autoload :Integrity, "kotoshu/integrity"
85
+
86
+ # LAZY: FastText integration (autoload)
87
+ autoload :SemanticAnalyzer, "kotoshu/analyzers/semantic_analyzer"
88
+
89
+ # LAZY: Document abstraction (autoload)
90
+ autoload :Location, "kotoshu/documents/location"
91
+ autoload :Document, "kotoshu/documents/document"
92
+ autoload :PlainTextDocument, "kotoshu/documents/plain_text_document"
93
+ autoload :MarkdownDocument, "kotoshu/documents/markdown_document"
94
+ autoload :AsciidocDocument, "kotoshu/documents/asciidoc_document"
95
+
96
+ # LAZY: Cache management (autoload)
97
+ autoload :LanguageCache, "kotoshu/cache/language_cache"
98
+ autoload :ModelCache, "kotoshu/cache/model_cache"
99
+
100
+ # LAZY: Language detection (autoload)
101
+ autoload :LanguageIdentifier, "kotoshu/language/identifier"
102
+
103
+ # LAZY: Development tools (autoload)
104
+ autoload :Debug, "kotoshu/debug_mode"
105
+ autoload :DebugLogger, "kotoshu/debug_logger"
106
+ autoload :Metrics, "kotoshu/metrics_module"
107
+ autoload :MetricsCollector, "kotoshu/metrics_collector"
108
+ end
109
+
110
+ module Kotoshu
111
+ class Error < StandardError; end
112
+
113
+ autoload :Paths, "kotoshu/paths"
114
+
115
+ # Global configuration instance.
116
+ #
117
+ # @return [Configuration] The global configuration
118
+ #
119
+ # @example
120
+ # Kotoshu.configure do |config|
121
+ # config.dictionary_path = "/usr/share/dict/words"
122
+ # config.language = "en-US"
123
+ # end
124
+ def self.configure
125
+ yield configuration if block_given?
126
+ configuration
127
+ end
128
+
129
+ # Get the global configuration.
130
+ #
131
+ # @return [Configuration] The global configuration
132
+ #
133
+ # @example
134
+ # config = Kotoshu.configuration
135
+ def self.configuration
136
+ Configuration.instance
137
+ end
138
+
139
+ # Default spellchecker (singleton). Uses the configured default language.
140
+ # Cache-only — raises ResourceNotSetupError if the default language hasn't
141
+ # been set up via Kotoshu.setup.
142
+ #
143
+ # @return [Spellchecker] The default spellchecker
144
+ # @raise [ResourceNotSetupError] if no language is set up
145
+ def self.spellchecker
146
+ return @spellchecker if @spellchecker
147
+
148
+ lang = configuration.default_language
149
+ raise ResourceNotSetupError.new(lang || "default", "spelling") if lang.nil? || lang.to_s.empty?
150
+
151
+ @spellchecker = spellchecker_for(lang)
152
+ end
153
+
154
+ # Get a spellchecker for a specific language (cache-only, raises on miss).
155
+ #
156
+ # @param language [String, Symbol] Language code (e.g., "en", "de", "fr")
157
+ # @return [Spellchecker] Spellchecker using a ResourceManager-resolved bundle
158
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
159
+ #
160
+ # @example
161
+ # Kotoshu.setup(:de)
162
+ # Kotoshu.spellchecker_for("de").correct?("Hallo") # => true
163
+ def self.spellchecker_for(language)
164
+ key = language.to_s
165
+ @spellcheckers ||= {}
166
+ @spellcheckers[key] ||= begin
167
+ bundle = ResourceManager.resolve(language: language)
168
+ Spellchecker.new(resource_bundle: bundle, config: configuration)
169
+ end
170
+ end
171
+
172
+ # Resolve language resources from the cache (no download).
173
+ #
174
+ # @param language [String, Symbol, nil] Language code; if nil, uses default
175
+ # @param want [Array<Symbol>] Resource types (default: [:spelling])
176
+ # @return [ResourceBundle] Resolved bundle
177
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
178
+ #
179
+ # @example
180
+ # Kotoshu.setup(:en)
181
+ # bundle = Kotoshu.resolve(language: "en")
182
+ # bundle.dictionary # => #<Dictionary::Hunspell ...>
183
+ def self.resolve(language: nil, want: nil)
184
+ lang = language || configuration.default_language
185
+ raise ResourceNotSetupError.new(lang || "default", "spelling") if lang.nil?
186
+
187
+ want_param = want || ResourceManager::DEFAULT_WANT
188
+ ResourceManager.resolve(language: lang, want: want_param)
189
+ end
190
+
191
+ # ---- Stage 1: Setup ----
192
+
193
+ # Set up resources for one or more languages (download or register local files).
194
+ # Idempotent: re-running with the same args is a no-op unless `force: true`.
195
+ #
196
+ # @param languages [String, Symbol, Array<String, Symbol>] One or more language codes
197
+ # @param want [Array<Symbol>] Resource types to fetch (default: [:spelling])
198
+ # @param force [Boolean] Re-fetch even if already cached
199
+ # @param strict [Boolean] Re-raise on optional-resource failure
200
+ # @param aff [String, nil] Path to local .aff file (single-language only)
201
+ # @param dic [String, nil] Path to local .dic file (single-language only)
202
+ # @param from [String, nil] Directory containing local .aff/.dic (single-language only)
203
+ # @param frequency [String, nil] Path to local frequency.json (single-language only)
204
+ # @return [SetupResult, Array<SetupResult>] Result or results (array if multiple languages)
205
+ #
206
+ # @example Download from kotoshu/dictionaries
207
+ # Kotoshu.setup(:en) # spelling only
208
+ # Kotoshu.setup(:en, want: %i[spelling frequency]) # spelling + frequency
209
+ # Kotoshu.setup(:en, :de, :fr) # multiple languages
210
+ #
211
+ # @example Register local files (user already has hunspell dicts)
212
+ # Kotoshu.setup(:en, aff: "/usr/share/hunspell/en_US.aff",
213
+ # dic: "/usr/share/hunspell/en_US.dic")
214
+ #
215
+ # @example Register local files from a directory
216
+ # Kotoshu.setup(:en, from: "/usr/share/hunspell/") # looks for en.aff, en.dic
217
+ def self.setup(*languages, want: nil, **opts)
218
+ raise ArgumentError, "Kotoshu.setup requires at least one language" if languages.empty?
219
+
220
+ want_param = want || ResourceManager::DEFAULT_WANT
221
+ if languages.size == 1
222
+ ResourceManager.setup(languages.first, want: want_param, **opts)
223
+ else
224
+ languages.map { |lang| ResourceManager.setup(lang, want: want_param, **opts) }
225
+ end
226
+ end
227
+
228
+ # Check if a language (or a specific resource for that language) is set up.
229
+ #
230
+ # @param language [String, Symbol] Language code
231
+ # @param resource [Symbol, nil] :spelling, :frequency, :model, or nil for any
232
+ # @return [Boolean] True if the resource is cached and available
233
+ #
234
+ # @example
235
+ # Kotoshu.setup(:en)
236
+ # Kotoshu.setup?(:en) # => true
237
+ # Kotoshu.setup?(:en, :spelling) # => true
238
+ # Kotoshu.setup?(:en, :frequency) # => false (not set up)
239
+ def self.setup?(language, resource = nil)
240
+ ResourceManager.setup?(language, resource: resource)
241
+ end
242
+
243
+ # List languages that have been set up.
244
+ #
245
+ # @return [Array<String>] Sorted array of language codes with cached spelling
246
+ #
247
+ # @example
248
+ # Kotoshu.languages_setup # => ["de", "en", "fr"]
249
+ def self.languages_setup
250
+ ResourceManager.languages_setup
251
+ end
252
+
253
+ # Reset the spellchecker cache. The next call to `spellchecker` or
254
+ # `spellchecker_for` re-resolves from the current configuration.
255
+ #
256
+ # Does NOT eagerly reload — clearing the cache is enough. This makes
257
+ # the method safe to call between tests even when no language is set
258
+ # up yet (the next call will raise ResourceNotSetupError per the
259
+ # strict two-stage contract).
260
+ def self.reset_spellchecker
261
+ @spellchecker = nil
262
+ @spellcheckers = nil
263
+ nil
264
+ end
265
+
266
+ # Check if a word is spelled correctly.
267
+ # Hot path — cache-only, raises if language not set up.
268
+ #
269
+ # @param word [String] The word to check
270
+ # @param language [String, Symbol, nil] Language code; if nil, uses configured default
271
+ # @return [Boolean] True if the word is correct
272
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
273
+ #
274
+ # @example
275
+ # Kotoshu.setup(:en)
276
+ # Kotoshu.correct?("hello") # => true
277
+ # Kotoshu.correct?("Hallo", language: "de") # requires Kotoshu.setup(:de) first
278
+ def self.correct?(word, language: nil)
279
+ checker = language ? spellchecker_for(language) : spellchecker
280
+ checker.correct?(word)
281
+ end
282
+
283
+ # Check if a word is misspelled. Hot path.
284
+ #
285
+ # @param word [String] The word to check
286
+ # @param language [String, Symbol, nil] Language code
287
+ # @return [Boolean] True if the word is misspelled
288
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
289
+ def self.misspelled?(word, language: nil)
290
+ !correct?(word, language: language)
291
+ end
292
+
293
+ # Get spelling suggestions for a word. Hot path.
294
+ #
295
+ # @param word [String] The misspelled word
296
+ # @param language [String, Symbol, nil] Language code
297
+ # @param options [Hash] Options (max_suggestions, etc.)
298
+ # @return [Suggestions::SuggestionSet] Generated suggestions
299
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
300
+ #
301
+ # @example
302
+ # Kotoshu.setup(:en)
303
+ # suggestions = Kotoshu.suggest("helo")
304
+ # suggestions.to_words # => ["hello", "help", "held", ...]
305
+ def self.suggest(word, language: nil, **options)
306
+ checker = language ? spellchecker_for(language) : spellchecker
307
+ checker.suggest(word, **options)
308
+ end
309
+
310
+ # Check text for spelling errors. Hot path.
311
+ #
312
+ # @param text [String] The text to check
313
+ # @param language [String, Symbol, nil] Language code; if nil, uses configured default
314
+ # @param options [Hash] Options
315
+ # @return [Models::Result::DocumentResult] The check result
316
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
317
+ #
318
+ # @example
319
+ # Kotoshu.setup(:en)
320
+ # result = Kotoshu.check("Hello wrold")
321
+ # result.errors.map(&:word) # => ["wrold"]
322
+ def self.check(text, language: nil, **_options)
323
+ checker = language ? spellchecker_for(language) : spellchecker
324
+ checker.check(text)
325
+ end
326
+
327
+ # Check a file for spelling errors. Hot path.
328
+ #
329
+ # @param path [String] The file path
330
+ # @param language [String, Symbol, nil] Language code
331
+ # @param options [Hash] Options
332
+ # @return [Models::Result::DocumentResult] The check result
333
+ # @raise [ResourceNotSetupError] if the language hasn't been set up
334
+ #
335
+ # @example
336
+ # Kotoshu.setup(:en)
337
+ # result = Kotoshu.check_file("README.md")
338
+ # result.success? # => false
339
+ def self.check_file(path, language: nil, **_options)
340
+ checker = language ? spellchecker_for(language) : spellchecker
341
+ checker.check_file(path)
342
+ end
343
+
344
+ # Check multiple files for spelling errors.
345
+ #
346
+ # @param paths [Array<String>] The file paths
347
+ # @param options [Hash] Options
348
+ # @return [Array<Models::Result::DocumentResult>] Results for each file
349
+ #
350
+ # @example
351
+ # results = Kotoshu.check_files(%w[README.md CHANGELOG.md])
352
+ # results.select(&:failed?)
353
+ def self.check_files(paths, **options)
354
+ paths.map { |path| check_file(path, **options) }
355
+ end
356
+
357
+ # Convenience method for creating an indexed dictionary.
358
+ #
359
+ # @param source [Array<String>, Hash, nil] Words or file path
360
+ # @return [Core::IndexedDictionary] New dictionary
361
+ def self.dictionary(source = nil)
362
+ case source
363
+ when Array
364
+ Core::IndexedDictionary.new(source)
365
+ when String
366
+ Core::IndexedDictionary.from_file(source)
367
+ when nil, Hash
368
+ Core::IndexedDictionary.new
369
+ else
370
+ raise ArgumentError, "Invalid dictionary source: #{source.inspect}"
371
+ end
372
+ end
373
+
374
+ # Convenience method for creating a trie.
375
+ #
376
+ # @param source [Array<String>, String, nil] Words or file path
377
+ # @return [Core::Trie::Trie] New trie
378
+ def self.trie(source = nil)
379
+ case source
380
+ when Array
381
+ Core::Trie::Builder.from_array(source)
382
+ when String
383
+ if File.exist?(source)
384
+ Core::Trie::Builder.from_file(source)
385
+ else
386
+ Core::Trie::Builder.from_string(source)
387
+ end
388
+ when nil
389
+ Core::Trie::Trie.new
390
+ else
391
+ raise ArgumentError, "Invalid trie source: #{source.inspect}"
392
+ end
393
+ end
394
+
395
+ # Convenience method for creating a suggestion pipeline.
396
+ #
397
+ # @param strategies [Array] Optional strategies to add
398
+ # @return [Suggestions::Strategies::CompositeStrategy] New pipeline
399
+ def self.suggestion_pipeline(*strategies)
400
+ pipeline = Suggestions::Strategies::CompositeStrategy.new(name: :default)
401
+ strategies.each { |s| pipeline.add(s) }
402
+ pipeline
403
+ end
404
+
405
+ # Register a custom dictionary type.
406
+ #
407
+ # @param type [Symbol] The type key
408
+ # @param klass [Class] The dictionary class
409
+ #
410
+ # @example
411
+ # Kotoshu.register_dictionary_type(:my_custom, MyDictionary)
412
+ def self.register_dictionary_type(type, klass)
413
+ Dictionary.register_type(type, klass)
414
+ end
415
+
416
+ # Register a custom suggestion algorithm.
417
+ #
418
+ # @param name [Symbol] The algorithm name
419
+ # @param klass [Class] The algorithm class
420
+ #
421
+ # @example
422
+ # Kotoshu.register_suggestion_algorithm(:my_custom, MyStrategy)
423
+ def self.register_suggestion_algorithm(name, klass)
424
+ Suggestions::Strategies::BaseStrategy.register_type(name, klass)
425
+ end
426
+
427
+ # Access the language module.
428
+ #
429
+ # @return [Module] The Language module
430
+ #
431
+ # @example
432
+ # Kotoshu::Language.detect("Hello world") # => "en"
433
+ def self.language
434
+ Language
435
+ end
436
+
437
+ # Detect language of text.
438
+ #
439
+ # @param text [String] Text to analyze
440
+ # @return [String, nil] Detected language code
441
+ #
442
+ # @example
443
+ # Kotoshu.detect_language("Bonjour le monde") # => "fr"
444
+ # Kotoshu.detect_language("こんにちは") # => "ja"
445
+ def self.detect_language(text)
446
+ Language.detect(text)
447
+ end
448
+
449
+ # Detect language with confidence score.
450
+ #
451
+ # @param text [String] Text to analyze
452
+ # @return [Array<String, Float>] Language code and confidence
453
+ #
454
+ # @example
455
+ # lang, conf = Kotoshu.detect_language_with_confidence("Hello world")
456
+ # lang # => "en"
457
+ # conf # => 0.85
458
+ def self.detect_language_with_confidence(text)
459
+ Language.detect_with_confidence(text)
460
+ end
461
+
462
+ # Get language class by code.
463
+ #
464
+ # @param code [String] Language code (e.g., "en-US", "de-DE")
465
+ # @return [Class, nil] Language class or nil
466
+ #
467
+ # @example
468
+ # Kotoshu.get_language("en-US")
469
+ def self.get_language(code)
470
+ Language.get(code)
471
+ end
472
+
473
+ # Check if a language is registered.
474
+ #
475
+ # @param code [String] Language code
476
+ # @return [Boolean] True if registered
477
+ #
478
+ # @example
479
+ # Kotoshu.language_registered?("en-US") # => true or false
480
+ def self.language_registered?(code)
481
+ Language.registered?(code)
482
+ end
483
+
484
+ # Get all supported language codes.
485
+ #
486
+ # @return [Array<String>] List of language codes
487
+ #
488
+ # @example
489
+ # Kotoshu.supported_languages # => ["de-DE", "en-US", "fr-FR", ...]
490
+ def self.supported_languages
491
+ Language.supported_codes
492
+ end
493
+ end