kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,239 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example 5: Suggestion Algorithms
5
+ #
6
+ # This example demonstrates how to use different suggestion algorithms
7
+ # and build custom suggestion pipelines.
8
+
9
+ require_relative "../lib/kotoshu"
10
+
11
+ puts "=== Example 5: Suggestion Algorithms ==="
12
+ puts
13
+
14
+ # Create a test dictionary
15
+ test_words = %w[
16
+ hello help held heap world
17
+ test text toast tost
18
+ run running runner
19
+ code coding coded
20
+ write writing writer
21
+ speak speaking speaker
22
+ read reading reader
23
+ walk walking walker
24
+ talk talking talker
25
+ ]
26
+
27
+ test_dict = Kotoshu::Dictionary::Custom.new(
28
+ words: test_words,
29
+ language_code: "en"
30
+ )
31
+
32
+ puts "Test dictionary: #{test_dict.size} words"
33
+ puts
34
+
35
+ # Example 1: Edit Distance Strategy
36
+ puts "1. Edit Distance Strategy"
37
+ puts "-" * 40
38
+
39
+ edit_strategy = Kotoshu::Suggestions::Strategies::EditDistanceStrategy.new
40
+ context = Kotoshu::Suggestions::Context.new(
41
+ word: "helo",
42
+ dictionary: test_dict,
43
+ max_results: 5
44
+ )
45
+
46
+ result = edit_strategy.generate(context)
47
+ puts "Suggestions for 'helo':"
48
+ puts " #{result.to_words.join(", ")}"
49
+ puts " Details:"
50
+ result.each do |sugg|
51
+ puts " • #{sugg.word} (distance: #{sugg.distance}, confidence: #{sugg.confidence.round(2)})"
52
+ end
53
+
54
+ puts
55
+ puts "=" * 40
56
+ puts
57
+
58
+ # Example 2: Phonetic Strategy (Soundex)
59
+ puts "2. Phonetic Strategy (Soundex)"
60
+ puts "-" * 40
61
+
62
+ phonetic_strategy = Kotoshu::Suggestions::Strategies::PhoneticStrategy.new(
63
+ algorithm: :soundex
64
+ )
65
+
66
+ context2 = Kotoshu::Suggestions::Context.new(
67
+ word: "hel",
68
+ dictionary: test_dict,
69
+ max_results: 5
70
+ )
71
+
72
+ result2 = phonetic_strategy.generate(context2)
73
+ puts "Suggestions for 'hel' (Soundex):"
74
+ puts " #{result2.to_words.join(", ")}"
75
+
76
+ # Show Soundex codes
77
+ puts "\nSoundex codes:"
78
+ puts " 'hel' -> #{phonetic_strategy.send(:soundex_code, "hel")}"
79
+ test_words.each do |word|
80
+ code = phonetic_strategy.send(:soundex_code, word)
81
+ puts " '#{word}' -> #{code}"
82
+ end
83
+
84
+ puts
85
+ puts "=" * 40
86
+ puts
87
+
88
+ # Example 3: Phonetic Strategy (Metaphone)
89
+ puts "3. Phonetic Strategy (Metaphone)"
90
+ puts "-" * 40
91
+
92
+ metaphone_strategy = Kotoshu::Suggestions::Strategies::PhoneticStrategy.new(
93
+ algorithm: :metaphone
94
+ )
95
+
96
+ context3 = Kotoshu::Suggestions::Context.new(
97
+ word: "fnix", # Should suggest "Phoenix"
98
+ dictionary: test_dict,
99
+ max_results: 5
100
+ )
101
+
102
+ # Add "phoenix" to dictionary for testing
103
+ test_dict.add_word("phoenix")
104
+
105
+ result3 = metaphone_strategy.generate(context3)
106
+ puts "Suggestions for 'fnix' (Metaphone):"
107
+ puts " #{result3.to_words.join(", ")}"
108
+
109
+ puts "\nMetaphone codes:"
110
+ puts " 'fnix' -> #{metaphone_strategy.send(:metaphone_code, "fnix")}"
111
+ puts " 'phoenix' -> #{metaphone_strategy.send(:metaphone_code, "phoenix")}"
112
+ puts " 'finish' -> #{metaphone_strategy.send(:metaphone_code, "finish")}"
113
+
114
+ puts
115
+ puts "=" * 40
116
+ puts
117
+
118
+ # Example 4: N-Gram Strategy
119
+ puts "4. N-Gram Strategy"
120
+ puts "-" * 40
121
+
122
+ ngram_strategy = Kotoshu::Suggestions::Strategies::NgramStrategy.new(
123
+ n: 2,
124
+ min_similarity: 0.2
125
+ )
126
+
127
+ context4 = Kotoshu::Suggestions::Context.new(
128
+ word: "tsting", # Should suggest "testing"
129
+ dictionary: test_dict,
130
+ max_results: 5
131
+ )
132
+
133
+ # Add "testing" to dictionary
134
+ test_dict.add_word("testing")
135
+
136
+ result4 = ngram_strategy.generate(context4)
137
+ puts "Suggestions for 'tsting' (N-Gram, n=2):"
138
+ puts " #{result4.to_words.join(", ")}"
139
+
140
+ puts
141
+ puts "=" * 40
142
+ puts
143
+
144
+ # Example 5: Composite Strategy (Pipeline)
145
+ puts "5. Composite Strategy (Pipeline)"
146
+ puts "-" * 40
147
+
148
+ # Build a pipeline with multiple strategies
149
+ pipeline = Kotoshu.suggestion_pipeline(
150
+ Kotoshu::Suggestions::Strategies::EditDistanceStrategy.new,
151
+ Kotoshu::Suggestions::Strategies::PhoneticStrategy.new,
152
+ Kotoshu::Suggestions::Strategies::NgramStrategy.new(n: 2)
153
+ )
154
+
155
+ context5 = Kotoshu::Suggestions::Context.new(
156
+ word: "wrld",
157
+ dictionary: test_dict,
158
+ max_results: 10
159
+ )
160
+
161
+ result5 = pipeline.generate(context5)
162
+ puts "Suggestions for 'wrld' (Composite Pipeline):"
163
+ puts " #{result5.to_words.join(", ")}"
164
+
165
+ puts
166
+ puts "Breakdown by source:"
167
+ result5.from_source(:edit_distance).each do |sugg|
168
+ puts " EditDistance: #{sugg.word} (distance: #{sugg.distance})"
169
+ end
170
+ result5.from_source(:phonetic).each do |sugg|
171
+ puts " Phonetic: #{sugg.word}"
172
+ end
173
+ result5.from_source(:ngram).each do |sugg|
174
+ puts " N-Gram: #{sugg.word}"
175
+ end
176
+
177
+ puts
178
+ puts "=" * 40
179
+ puts
180
+
181
+ # Example 6: Custom Strategy
182
+ puts "6. Custom Strategy"
183
+ puts "-" * 40
184
+
185
+ class PrefixStrategy < Kotoshu::Suggestions::Strategies::BaseStrategy
186
+ def generate(context)
187
+ word = context.word
188
+ dict_words = dictionary_words(context)
189
+
190
+ # Find words with same prefix
191
+ prefix_len = [word.length - 1, 3].max
192
+ prefix = word[0...prefix_len]
193
+
194
+ candidates = dict_words.select { |w| w.start_with?(prefix) && w != word }
195
+ create_suggestion_set(candidates)
196
+ end
197
+ end
198
+
199
+ prefix_strategy = PrefixStrategy.new(name: :prefix)
200
+
201
+ context6 = Kotoshu::Suggestions::Context.new(
202
+ word: "hel", # Incomplete word
203
+ dictionary: test_dict,
204
+ max_results: 10
205
+ )
206
+
207
+ result6 = prefix_strategy.generate(context6)
208
+ puts "Suggestions for 'hel' (Prefix-based):"
209
+ puts " #{result6.to_words.join(", ")}"
210
+
211
+ puts
212
+ puts "=" * 40
213
+ puts
214
+
215
+ # Example 7: Suggestion Generator
216
+ puts "7. Suggestion Generator (High-level API)"
217
+ puts "-" * 40
218
+
219
+ generator = Kotoshu::Suggestions::Generator.new(
220
+ test_dict,
221
+ max_suggestions: 10,
222
+ algorithms: [
223
+ Kotoshu::Suggestions::Strategies::EditDistanceStrategy,
224
+ Kotoshu::Suggestions::Strategies::PhoneticStrategy
225
+ ]
226
+ )
227
+
228
+ puts "Generator configured with:"
229
+ puts " Dictionary: #{test_dict.size} words"
230
+ puts " Max suggestions: 10"
231
+ puts " Algorithms: EditDistanceStrategy, PhoneticStrategy"
232
+ puts
233
+
234
+ test_words = %w[helo wrld tsting fnix]
235
+ test_words.each do |word|
236
+ suggestions = generator.suggest(word)
237
+ puts "Suggestions for '#{word}':"
238
+ puts " #{suggestions.to_words.join(", ")}"
239
+ end
@@ -0,0 +1,287 @@
1
+ #!/usr/bin/env ruby
2
+ # frozen_string_literal: true
3
+
4
+ # Example 6: Configuration and Advanced Usage
5
+ #
6
+ # This example demonstrates how to configure Kotoshu and use
7
+ # advanced features like custom words, multiple languages, etc.
8
+
9
+ require_relative "../lib/kotoshu"
10
+
11
+ puts "=== Example 6: Configuration and Advanced Usage ==="
12
+ puts
13
+
14
+ # Example 1: Global Configuration
15
+ puts "1. Global Configuration"
16
+ puts "-" * 40
17
+
18
+ # Use system dictionary instead of non-existent path
19
+ dict_path = if File.exist?("/usr/share/dict/words")
20
+ "/usr/share/dict/words"
21
+ elsif File.exist?("dictionaries/unix_words/words")
22
+ "dictionaries/unix_words/words"
23
+ else
24
+ # Will use auto-detected system dictionary
25
+ nil
26
+ end
27
+
28
+ Kotoshu.configure do |config|
29
+ config.dictionary_type = :unix_words
30
+ config.dictionary_path = dict_path
31
+ config.language = "en-US"
32
+ config.max_suggestions = 15
33
+ config.case_sensitive = false
34
+ config.custom_words = %w[Kotoshu spellcheck]
35
+ end
36
+
37
+ puts "Configuration:"
38
+ config = Kotoshu.configuration
39
+ puts " Dictionary type: #{config.dictionary_type}"
40
+ puts " Dictionary path: #{config.dictionary_path}"
41
+ puts " Language: #{config.language}"
42
+ puts " Max suggestions: #{config.max_suggestions}"
43
+ puts " Case sensitive: #{config.case_sensitive}"
44
+ puts " Custom words: #{config.custom_words.inspect}"
45
+ puts
46
+
47
+ # Use the configured spellchecker
48
+ puts "Using configured spellchecker:"
49
+ puts " Has 'hello': #{Kotoshu.correct?("hello")}"
50
+ puts " Has 'Kotoshu': #{Kotoshu.correct?("Kotoshu")}"
51
+ suggestions = Kotoshu.suggest("helo")
52
+ puts " Suggestions for 'helo': #{suggestions.to_words.first(10).join(", ")}..."
53
+
54
+ puts
55
+ puts "=" * 40
56
+ puts
57
+
58
+ # Example 2: Spellchecker Instance with Custom Configuration
59
+ puts "2. Custom Spellchecker Instance"
60
+ puts "-" * 40
61
+
62
+ # Create a custom dictionary
63
+ custom_dict = Kotoshu::Dictionary::Custom.new(
64
+ words: %w[ruby gem rspec rake bundler],
65
+ language_code: "en"
66
+ )
67
+
68
+ # Create a spellchecker with the custom dictionary
69
+ custom_spellchecker = Kotoshu::Spellchecker.new(dictionary: custom_dict)
70
+
71
+ puts "Custom spellchecker with Ruby-related words:"
72
+ puts " Has 'ruby': #{custom_spellchecker.correct?("ruby")}"
73
+ puts " Has 'gem': #{custom_spellchecker.correct?("gem")}"
74
+ puts " Has 'rake': #{custom_spellchecker.correct?("rake")}"
75
+ puts " Has 'python': #{custom_spellchecker.correct?("python")}"
76
+ puts " Suggestions for 'rke': #{custom_spellchecker.suggest("rke").to_words.join(", ")}"
77
+
78
+ puts
79
+ puts "=" * 40
80
+ puts
81
+
82
+ # Example 3: Dictionary Repository
83
+ puts "3. Dictionary Repository"
84
+ puts "-" * 40
85
+
86
+ repo = Kotoshu::Dictionary::Repository.new
87
+
88
+ # Register multiple dictionaries
89
+ repo.register(:en_US, custom_dict)
90
+ repo.register(:programming, Kotoshu::Dictionary::PlainText.from_words(
91
+ %w[code function variable class module],
92
+ language_code: "en"
93
+ ))
94
+ repo.register(:tech, Kotoshu::Dictionary::PlainText.from_words(
95
+ %w[computer software hardware internet api],
96
+ language_code: "en"
97
+ ))
98
+
99
+ puts "Registered dictionaries:"
100
+ repo.each_key do |key|
101
+ dict = repo.get(key)
102
+ puts " #{key}: #{dict.size} words (#{dict.type})"
103
+ end
104
+
105
+ puts "\nFind by language 'en':"
106
+ found = repo.find_by_language("en")
107
+ found.each do |dict|
108
+ puts " #{dict.type}: #{dict.size} words"
109
+ end
110
+
111
+ puts
112
+ puts "=" * 40
113
+ puts
114
+
115
+ # Example 4: IndexedDictionary
116
+ puts "4. IndexedDictionary (Rich Query Interface)"
117
+ puts "-" * 40
118
+
119
+ index_dict = Kotoshu.dictionary(%w[
120
+ hello help held heap
121
+ world work word
122
+ test text toast
123
+ run running runner
124
+ code coding coded
125
+ ])
126
+
127
+ puts "IndexedDictionary: #{index_dict.size} words"
128
+ puts
129
+
130
+ puts "Query methods:"
131
+ puts " Words starting with 'he': #{index_dict.find_by_prefix("he").inspect}"
132
+ puts " Words ending with 'ld': #{index_dict.find_by_suffix("ld").inspect}"
133
+ puts " Words with length 3: #{index_dict.find_by_length(3).inspect}"
134
+ puts " Words matching pattern 't.*t': #{index_dict.find_by_pattern(/t.*t/).inspect}"
135
+ puts
136
+
137
+ puts "Statistics:"
138
+ stats = index_dict.statistics
139
+ stats.each do |key, value|
140
+ puts " #{key}: #{value}"
141
+ end
142
+
143
+ puts
144
+ puts "=" * 40
145
+ puts
146
+
147
+ # Example 5: WordResult and DocumentResult
148
+ puts "5. Result Objects"
149
+ puts "-" * 40
150
+
151
+ # Check a word
152
+ word_result = Kotoshu.spellchecker.check_word("hello")
153
+ puts "WordResult for 'hello':"
154
+ puts " Word: #{word_result.word}"
155
+ puts " Correct: #{word_result.correct?}"
156
+ puts " Has suggestions: #{word_result.has_suggestions?}"
157
+ puts
158
+
159
+ word_result2 = Kotoshu.spellchecker.check_word("helo")
160
+ puts "WordResult for 'helo':"
161
+ puts " Word: #{word_result2.word}"
162
+ puts " Correct: #{word_result2.correct?}"
163
+ puts " Suggestion count: #{word_result2.suggestion_count}"
164
+ puts " First suggestion: #{word_result2.first_suggestion}"
165
+ puts " Top 3: #{word_result2.top_suggestions(3).join(", ")}"
166
+ puts
167
+
168
+ # Check text
169
+ text_result = Kotoshu.spellchecker.check("Hello wrold! This is a tst.")
170
+ puts "DocumentResult:"
171
+ puts " Success: #{text_result.success?}"
172
+ puts " Word count: #{text_result.word_count}"
173
+ puts " Error count: #{text_result.error_count}"
174
+ puts " Unique errors: #{text_result.unique_error_count}"
175
+ puts
176
+ puts " Errors:"
177
+ text_result.errors.each do |error|
178
+ suggestions_str = if error.has_suggestions?
179
+ " (suggestions: #{error.top_suggestions(2).join(", ")})"
180
+ else
181
+ ""
182
+ end
183
+ puts " • #{error.word} at position #{error.position}#{suggestions_str}"
184
+ end
185
+
186
+ puts
187
+ puts "=" * 40
188
+ puts
189
+
190
+ # Example 6: Multiple File Checking
191
+ puts "6. Batch File Checking"
192
+ puts "-" * 40
193
+
194
+ # Check multiple files
195
+ fixtures_dir = "spec/fixtures/documents"
196
+ if Dir.exist?(fixtures_dir)
197
+ files = Dir.glob(File.join(fixtures_dir, "*.txt"))
198
+ puts "Checking #{files.size} files..."
199
+ puts
200
+
201
+ files.each do |file|
202
+ result = Kotoshu.check_file(file)
203
+ status = result.success? ? "✓" : "✗"
204
+ filename = File.basename(file)
205
+ puts "#{status} #{filename}: #{result.error_count} error(s), #{result.word_count} words"
206
+ end
207
+
208
+ puts
209
+
210
+ # Get all results at once
211
+ results = Kotoshu.check_files(files)
212
+ total_errors = results.sum(&:error_count)
213
+ total_words = results.sum(&:word_count)
214
+ failed_count = results.count(&:failed?)
215
+
216
+ puts "Summary:"
217
+ puts " Files checked: #{files.size}"
218
+ puts " Files with errors: #{failed_count}"
219
+ puts " Total errors: #{total_errors}"
220
+ puts " Total words: #{total_words}"
221
+ end
222
+
223
+ puts
224
+ puts "=" * 40
225
+ puts
226
+
227
+ # Example 7: Error Handling
228
+ puts "7. Error Handling"
229
+ puts "-" * 40
230
+
231
+ begin
232
+ # Try to load a non-existent dictionary
233
+ bad_config = Kotoshu::Configuration.new(
234
+ dictionary_type: :plain_text,
235
+ dictionary_path: "/nonexistent/path.txt"
236
+ )
237
+ bad_config.load_dictionary
238
+ rescue Kotoshu::DictionaryNotFoundError => e
239
+ puts "Caught DictionaryNotFoundError:"
240
+ puts " Message: #{e.message}"
241
+ puts " Path: #{e.path}"
242
+ end
243
+
244
+ puts
245
+
246
+ begin
247
+ # Try to use an invalid dictionary type
248
+ bad_config2 = Kotoshu::Configuration.new(
249
+ dictionary_type: :invalid_type
250
+ )
251
+ bad_config2.load_dictionary
252
+ rescue Kotoshu::ConfigurationError => e
253
+ puts "Caught ConfigurationError:"
254
+ puts " Message: #{e.message}"
255
+ puts " Key: #{e.key.inspect}"
256
+ end
257
+
258
+ puts
259
+ puts "=" * 40
260
+ puts
261
+
262
+ # Example 8: Thread Safety (each instance is independent)
263
+ puts "8. Thread Safety"
264
+ puts "-" * 40
265
+
266
+ # Create two independent spellcheckers
267
+ spell1 = Kotoshu::Spellchecker.new(
268
+ dictionary: Kotoshu::Dictionary::Custom.new(
269
+ words: %w[hello world],
270
+ language_code: "en"
271
+ )
272
+ )
273
+
274
+ spell2 = Kotoshu::Spellchecker.new(
275
+ dictionary: Kotoshu::Dictionary::Custom.new(
276
+ words: %w[ruby python],
277
+ language_code: "en"
278
+ )
279
+ )
280
+
281
+ puts "Spellchecker 1 words: #{spell1.dictionary.words.inspect}"
282
+ puts "Spellchecker 2 words: #{spell2.dictionary.words.inspect}"
283
+ puts
284
+ puts "Spellchecker 1 has 'hello': #{spell1.correct?("hello")}"
285
+ puts "Spellchecker 1 has 'ruby': #{spell1.correct?("ruby")}"
286
+ puts "Spellchecker 2 has 'hello': #{spell2.correct?("hello")}"
287
+ puts "Spellchecker 2 has 'ruby': #{spell2.correct?("ruby")}"