kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,239 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example 5: Suggestion Algorithms
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how to use different suggestion algorithms
|
|
7
|
+
# and build custom suggestion pipelines.
|
|
8
|
+
|
|
9
|
+
require_relative "../lib/kotoshu"
|
|
10
|
+
|
|
11
|
+
puts "=== Example 5: Suggestion Algorithms ==="
|
|
12
|
+
puts
|
|
13
|
+
|
|
14
|
+
# Create a test dictionary
|
|
15
|
+
test_words = %w[
|
|
16
|
+
hello help held heap world
|
|
17
|
+
test text toast tost
|
|
18
|
+
run running runner
|
|
19
|
+
code coding coded
|
|
20
|
+
write writing writer
|
|
21
|
+
speak speaking speaker
|
|
22
|
+
read reading reader
|
|
23
|
+
walk walking walker
|
|
24
|
+
talk talking talker
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
test_dict = Kotoshu::Dictionary::Custom.new(
|
|
28
|
+
words: test_words,
|
|
29
|
+
language_code: "en"
|
|
30
|
+
)
|
|
31
|
+
|
|
32
|
+
puts "Test dictionary: #{test_dict.size} words"
|
|
33
|
+
puts
|
|
34
|
+
|
|
35
|
+
# Example 1: Edit Distance Strategy
|
|
36
|
+
puts "1. Edit Distance Strategy"
|
|
37
|
+
puts "-" * 40
|
|
38
|
+
|
|
39
|
+
edit_strategy = Kotoshu::Suggestions::Strategies::EditDistanceStrategy.new
|
|
40
|
+
context = Kotoshu::Suggestions::Context.new(
|
|
41
|
+
word: "helo",
|
|
42
|
+
dictionary: test_dict,
|
|
43
|
+
max_results: 5
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
result = edit_strategy.generate(context)
|
|
47
|
+
puts "Suggestions for 'helo':"
|
|
48
|
+
puts " #{result.to_words.join(", ")}"
|
|
49
|
+
puts " Details:"
|
|
50
|
+
result.each do |sugg|
|
|
51
|
+
puts " • #{sugg.word} (distance: #{sugg.distance}, confidence: #{sugg.confidence.round(2)})"
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
puts
|
|
55
|
+
puts "=" * 40
|
|
56
|
+
puts
|
|
57
|
+
|
|
58
|
+
# Example 2: Phonetic Strategy (Soundex)
|
|
59
|
+
puts "2. Phonetic Strategy (Soundex)"
|
|
60
|
+
puts "-" * 40
|
|
61
|
+
|
|
62
|
+
phonetic_strategy = Kotoshu::Suggestions::Strategies::PhoneticStrategy.new(
|
|
63
|
+
algorithm: :soundex
|
|
64
|
+
)
|
|
65
|
+
|
|
66
|
+
context2 = Kotoshu::Suggestions::Context.new(
|
|
67
|
+
word: "hel",
|
|
68
|
+
dictionary: test_dict,
|
|
69
|
+
max_results: 5
|
|
70
|
+
)
|
|
71
|
+
|
|
72
|
+
result2 = phonetic_strategy.generate(context2)
|
|
73
|
+
puts "Suggestions for 'hel' (Soundex):"
|
|
74
|
+
puts " #{result2.to_words.join(", ")}"
|
|
75
|
+
|
|
76
|
+
# Show Soundex codes
|
|
77
|
+
puts "\nSoundex codes:"
|
|
78
|
+
puts " 'hel' -> #{phonetic_strategy.send(:soundex_code, "hel")}"
|
|
79
|
+
test_words.each do |word|
|
|
80
|
+
code = phonetic_strategy.send(:soundex_code, word)
|
|
81
|
+
puts " '#{word}' -> #{code}"
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
puts
|
|
85
|
+
puts "=" * 40
|
|
86
|
+
puts
|
|
87
|
+
|
|
88
|
+
# Example 3: Phonetic Strategy (Metaphone)
|
|
89
|
+
puts "3. Phonetic Strategy (Metaphone)"
|
|
90
|
+
puts "-" * 40
|
|
91
|
+
|
|
92
|
+
metaphone_strategy = Kotoshu::Suggestions::Strategies::PhoneticStrategy.new(
|
|
93
|
+
algorithm: :metaphone
|
|
94
|
+
)
|
|
95
|
+
|
|
96
|
+
context3 = Kotoshu::Suggestions::Context.new(
|
|
97
|
+
word: "fnix", # Should suggest "Phoenix"
|
|
98
|
+
dictionary: test_dict,
|
|
99
|
+
max_results: 5
|
|
100
|
+
)
|
|
101
|
+
|
|
102
|
+
# Add "phoenix" to dictionary for testing
|
|
103
|
+
test_dict.add_word("phoenix")
|
|
104
|
+
|
|
105
|
+
result3 = metaphone_strategy.generate(context3)
|
|
106
|
+
puts "Suggestions for 'fnix' (Metaphone):"
|
|
107
|
+
puts " #{result3.to_words.join(", ")}"
|
|
108
|
+
|
|
109
|
+
puts "\nMetaphone codes:"
|
|
110
|
+
puts " 'fnix' -> #{metaphone_strategy.send(:metaphone_code, "fnix")}"
|
|
111
|
+
puts " 'phoenix' -> #{metaphone_strategy.send(:metaphone_code, "phoenix")}"
|
|
112
|
+
puts " 'finish' -> #{metaphone_strategy.send(:metaphone_code, "finish")}"
|
|
113
|
+
|
|
114
|
+
puts
|
|
115
|
+
puts "=" * 40
|
|
116
|
+
puts
|
|
117
|
+
|
|
118
|
+
# Example 4: N-Gram Strategy
|
|
119
|
+
puts "4. N-Gram Strategy"
|
|
120
|
+
puts "-" * 40
|
|
121
|
+
|
|
122
|
+
ngram_strategy = Kotoshu::Suggestions::Strategies::NgramStrategy.new(
|
|
123
|
+
n: 2,
|
|
124
|
+
min_similarity: 0.2
|
|
125
|
+
)
|
|
126
|
+
|
|
127
|
+
context4 = Kotoshu::Suggestions::Context.new(
|
|
128
|
+
word: "tsting", # Should suggest "testing"
|
|
129
|
+
dictionary: test_dict,
|
|
130
|
+
max_results: 5
|
|
131
|
+
)
|
|
132
|
+
|
|
133
|
+
# Add "testing" to dictionary
|
|
134
|
+
test_dict.add_word("testing")
|
|
135
|
+
|
|
136
|
+
result4 = ngram_strategy.generate(context4)
|
|
137
|
+
puts "Suggestions for 'tsting' (N-Gram, n=2):"
|
|
138
|
+
puts " #{result4.to_words.join(", ")}"
|
|
139
|
+
|
|
140
|
+
puts
|
|
141
|
+
puts "=" * 40
|
|
142
|
+
puts
|
|
143
|
+
|
|
144
|
+
# Example 5: Composite Strategy (Pipeline)
|
|
145
|
+
puts "5. Composite Strategy (Pipeline)"
|
|
146
|
+
puts "-" * 40
|
|
147
|
+
|
|
148
|
+
# Build a pipeline with multiple strategies
|
|
149
|
+
pipeline = Kotoshu.suggestion_pipeline(
|
|
150
|
+
Kotoshu::Suggestions::Strategies::EditDistanceStrategy.new,
|
|
151
|
+
Kotoshu::Suggestions::Strategies::PhoneticStrategy.new,
|
|
152
|
+
Kotoshu::Suggestions::Strategies::NgramStrategy.new(n: 2)
|
|
153
|
+
)
|
|
154
|
+
|
|
155
|
+
context5 = Kotoshu::Suggestions::Context.new(
|
|
156
|
+
word: "wrld",
|
|
157
|
+
dictionary: test_dict,
|
|
158
|
+
max_results: 10
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
result5 = pipeline.generate(context5)
|
|
162
|
+
puts "Suggestions for 'wrld' (Composite Pipeline):"
|
|
163
|
+
puts " #{result5.to_words.join(", ")}"
|
|
164
|
+
|
|
165
|
+
puts
|
|
166
|
+
puts "Breakdown by source:"
|
|
167
|
+
result5.from_source(:edit_distance).each do |sugg|
|
|
168
|
+
puts " EditDistance: #{sugg.word} (distance: #{sugg.distance})"
|
|
169
|
+
end
|
|
170
|
+
result5.from_source(:phonetic).each do |sugg|
|
|
171
|
+
puts " Phonetic: #{sugg.word}"
|
|
172
|
+
end
|
|
173
|
+
result5.from_source(:ngram).each do |sugg|
|
|
174
|
+
puts " N-Gram: #{sugg.word}"
|
|
175
|
+
end
|
|
176
|
+
|
|
177
|
+
puts
|
|
178
|
+
puts "=" * 40
|
|
179
|
+
puts
|
|
180
|
+
|
|
181
|
+
# Example 6: Custom Strategy
|
|
182
|
+
puts "6. Custom Strategy"
|
|
183
|
+
puts "-" * 40
|
|
184
|
+
|
|
185
|
+
class PrefixStrategy < Kotoshu::Suggestions::Strategies::BaseStrategy
|
|
186
|
+
def generate(context)
|
|
187
|
+
word = context.word
|
|
188
|
+
dict_words = dictionary_words(context)
|
|
189
|
+
|
|
190
|
+
# Find words with same prefix
|
|
191
|
+
prefix_len = [word.length - 1, 3].max
|
|
192
|
+
prefix = word[0...prefix_len]
|
|
193
|
+
|
|
194
|
+
candidates = dict_words.select { |w| w.start_with?(prefix) && w != word }
|
|
195
|
+
create_suggestion_set(candidates)
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
prefix_strategy = PrefixStrategy.new(name: :prefix)
|
|
200
|
+
|
|
201
|
+
context6 = Kotoshu::Suggestions::Context.new(
|
|
202
|
+
word: "hel", # Incomplete word
|
|
203
|
+
dictionary: test_dict,
|
|
204
|
+
max_results: 10
|
|
205
|
+
)
|
|
206
|
+
|
|
207
|
+
result6 = prefix_strategy.generate(context6)
|
|
208
|
+
puts "Suggestions for 'hel' (Prefix-based):"
|
|
209
|
+
puts " #{result6.to_words.join(", ")}"
|
|
210
|
+
|
|
211
|
+
puts
|
|
212
|
+
puts "=" * 40
|
|
213
|
+
puts
|
|
214
|
+
|
|
215
|
+
# Example 7: Suggestion Generator
|
|
216
|
+
puts "7. Suggestion Generator (High-level API)"
|
|
217
|
+
puts "-" * 40
|
|
218
|
+
|
|
219
|
+
generator = Kotoshu::Suggestions::Generator.new(
|
|
220
|
+
test_dict,
|
|
221
|
+
max_suggestions: 10,
|
|
222
|
+
algorithms: [
|
|
223
|
+
Kotoshu::Suggestions::Strategies::EditDistanceStrategy,
|
|
224
|
+
Kotoshu::Suggestions::Strategies::PhoneticStrategy
|
|
225
|
+
]
|
|
226
|
+
)
|
|
227
|
+
|
|
228
|
+
puts "Generator configured with:"
|
|
229
|
+
puts " Dictionary: #{test_dict.size} words"
|
|
230
|
+
puts " Max suggestions: 10"
|
|
231
|
+
puts " Algorithms: EditDistanceStrategy, PhoneticStrategy"
|
|
232
|
+
puts
|
|
233
|
+
|
|
234
|
+
test_words = %w[helo wrld tsting fnix]
|
|
235
|
+
test_words.each do |word|
|
|
236
|
+
suggestions = generator.suggest(word)
|
|
237
|
+
puts "Suggestions for '#{word}':"
|
|
238
|
+
puts " #{suggestions.to_words.join(", ")}"
|
|
239
|
+
end
|
|
@@ -0,0 +1,287 @@
|
|
|
1
|
+
#!/usr/bin/env ruby
|
|
2
|
+
# frozen_string_literal: true
|
|
3
|
+
|
|
4
|
+
# Example 6: Configuration and Advanced Usage
|
|
5
|
+
#
|
|
6
|
+
# This example demonstrates how to configure Kotoshu and use
|
|
7
|
+
# advanced features like custom words, multiple languages, etc.
|
|
8
|
+
|
|
9
|
+
require_relative "../lib/kotoshu"
|
|
10
|
+
|
|
11
|
+
puts "=== Example 6: Configuration and Advanced Usage ==="
|
|
12
|
+
puts
|
|
13
|
+
|
|
14
|
+
# Example 1: Global Configuration
|
|
15
|
+
puts "1. Global Configuration"
|
|
16
|
+
puts "-" * 40
|
|
17
|
+
|
|
18
|
+
# Use system dictionary instead of non-existent path
|
|
19
|
+
dict_path = if File.exist?("/usr/share/dict/words")
|
|
20
|
+
"/usr/share/dict/words"
|
|
21
|
+
elsif File.exist?("dictionaries/unix_words/words")
|
|
22
|
+
"dictionaries/unix_words/words"
|
|
23
|
+
else
|
|
24
|
+
# Will use auto-detected system dictionary
|
|
25
|
+
nil
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
Kotoshu.configure do |config|
|
|
29
|
+
config.dictionary_type = :unix_words
|
|
30
|
+
config.dictionary_path = dict_path
|
|
31
|
+
config.language = "en-US"
|
|
32
|
+
config.max_suggestions = 15
|
|
33
|
+
config.case_sensitive = false
|
|
34
|
+
config.custom_words = %w[Kotoshu spellcheck]
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
puts "Configuration:"
|
|
38
|
+
config = Kotoshu.configuration
|
|
39
|
+
puts " Dictionary type: #{config.dictionary_type}"
|
|
40
|
+
puts " Dictionary path: #{config.dictionary_path}"
|
|
41
|
+
puts " Language: #{config.language}"
|
|
42
|
+
puts " Max suggestions: #{config.max_suggestions}"
|
|
43
|
+
puts " Case sensitive: #{config.case_sensitive}"
|
|
44
|
+
puts " Custom words: #{config.custom_words.inspect}"
|
|
45
|
+
puts
|
|
46
|
+
|
|
47
|
+
# Use the configured spellchecker
|
|
48
|
+
puts "Using configured spellchecker:"
|
|
49
|
+
puts " Has 'hello': #{Kotoshu.correct?("hello")}"
|
|
50
|
+
puts " Has 'Kotoshu': #{Kotoshu.correct?("Kotoshu")}"
|
|
51
|
+
suggestions = Kotoshu.suggest("helo")
|
|
52
|
+
puts " Suggestions for 'helo': #{suggestions.to_words.first(10).join(", ")}..."
|
|
53
|
+
|
|
54
|
+
puts
|
|
55
|
+
puts "=" * 40
|
|
56
|
+
puts
|
|
57
|
+
|
|
58
|
+
# Example 2: Spellchecker Instance with Custom Configuration
|
|
59
|
+
puts "2. Custom Spellchecker Instance"
|
|
60
|
+
puts "-" * 40
|
|
61
|
+
|
|
62
|
+
# Create a custom dictionary
|
|
63
|
+
custom_dict = Kotoshu::Dictionary::Custom.new(
|
|
64
|
+
words: %w[ruby gem rspec rake bundler],
|
|
65
|
+
language_code: "en"
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
# Create a spellchecker with the custom dictionary
|
|
69
|
+
custom_spellchecker = Kotoshu::Spellchecker.new(dictionary: custom_dict)
|
|
70
|
+
|
|
71
|
+
puts "Custom spellchecker with Ruby-related words:"
|
|
72
|
+
puts " Has 'ruby': #{custom_spellchecker.correct?("ruby")}"
|
|
73
|
+
puts " Has 'gem': #{custom_spellchecker.correct?("gem")}"
|
|
74
|
+
puts " Has 'rake': #{custom_spellchecker.correct?("rake")}"
|
|
75
|
+
puts " Has 'python': #{custom_spellchecker.correct?("python")}"
|
|
76
|
+
puts " Suggestions for 'rke': #{custom_spellchecker.suggest("rke").to_words.join(", ")}"
|
|
77
|
+
|
|
78
|
+
puts
|
|
79
|
+
puts "=" * 40
|
|
80
|
+
puts
|
|
81
|
+
|
|
82
|
+
# Example 3: Dictionary Repository
|
|
83
|
+
puts "3. Dictionary Repository"
|
|
84
|
+
puts "-" * 40
|
|
85
|
+
|
|
86
|
+
repo = Kotoshu::Dictionary::Repository.new
|
|
87
|
+
|
|
88
|
+
# Register multiple dictionaries
|
|
89
|
+
repo.register(:en_US, custom_dict)
|
|
90
|
+
repo.register(:programming, Kotoshu::Dictionary::PlainText.from_words(
|
|
91
|
+
%w[code function variable class module],
|
|
92
|
+
language_code: "en"
|
|
93
|
+
))
|
|
94
|
+
repo.register(:tech, Kotoshu::Dictionary::PlainText.from_words(
|
|
95
|
+
%w[computer software hardware internet api],
|
|
96
|
+
language_code: "en"
|
|
97
|
+
))
|
|
98
|
+
|
|
99
|
+
puts "Registered dictionaries:"
|
|
100
|
+
repo.each_key do |key|
|
|
101
|
+
dict = repo.get(key)
|
|
102
|
+
puts " #{key}: #{dict.size} words (#{dict.type})"
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
puts "\nFind by language 'en':"
|
|
106
|
+
found = repo.find_by_language("en")
|
|
107
|
+
found.each do |dict|
|
|
108
|
+
puts " #{dict.type}: #{dict.size} words"
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
puts
|
|
112
|
+
puts "=" * 40
|
|
113
|
+
puts
|
|
114
|
+
|
|
115
|
+
# Example 4: IndexedDictionary
|
|
116
|
+
puts "4. IndexedDictionary (Rich Query Interface)"
|
|
117
|
+
puts "-" * 40
|
|
118
|
+
|
|
119
|
+
index_dict = Kotoshu.dictionary(%w[
|
|
120
|
+
hello help held heap
|
|
121
|
+
world work word
|
|
122
|
+
test text toast
|
|
123
|
+
run running runner
|
|
124
|
+
code coding coded
|
|
125
|
+
])
|
|
126
|
+
|
|
127
|
+
puts "IndexedDictionary: #{index_dict.size} words"
|
|
128
|
+
puts
|
|
129
|
+
|
|
130
|
+
puts "Query methods:"
|
|
131
|
+
puts " Words starting with 'he': #{index_dict.find_by_prefix("he").inspect}"
|
|
132
|
+
puts " Words ending with 'ld': #{index_dict.find_by_suffix("ld").inspect}"
|
|
133
|
+
puts " Words with length 3: #{index_dict.find_by_length(3).inspect}"
|
|
134
|
+
puts " Words matching pattern 't.*t': #{index_dict.find_by_pattern(/t.*t/).inspect}"
|
|
135
|
+
puts
|
|
136
|
+
|
|
137
|
+
puts "Statistics:"
|
|
138
|
+
stats = index_dict.statistics
|
|
139
|
+
stats.each do |key, value|
|
|
140
|
+
puts " #{key}: #{value}"
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
puts
|
|
144
|
+
puts "=" * 40
|
|
145
|
+
puts
|
|
146
|
+
|
|
147
|
+
# Example 5: WordResult and DocumentResult
|
|
148
|
+
puts "5. Result Objects"
|
|
149
|
+
puts "-" * 40
|
|
150
|
+
|
|
151
|
+
# Check a word
|
|
152
|
+
word_result = Kotoshu.spellchecker.check_word("hello")
|
|
153
|
+
puts "WordResult for 'hello':"
|
|
154
|
+
puts " Word: #{word_result.word}"
|
|
155
|
+
puts " Correct: #{word_result.correct?}"
|
|
156
|
+
puts " Has suggestions: #{word_result.has_suggestions?}"
|
|
157
|
+
puts
|
|
158
|
+
|
|
159
|
+
word_result2 = Kotoshu.spellchecker.check_word("helo")
|
|
160
|
+
puts "WordResult for 'helo':"
|
|
161
|
+
puts " Word: #{word_result2.word}"
|
|
162
|
+
puts " Correct: #{word_result2.correct?}"
|
|
163
|
+
puts " Suggestion count: #{word_result2.suggestion_count}"
|
|
164
|
+
puts " First suggestion: #{word_result2.first_suggestion}"
|
|
165
|
+
puts " Top 3: #{word_result2.top_suggestions(3).join(", ")}"
|
|
166
|
+
puts
|
|
167
|
+
|
|
168
|
+
# Check text
|
|
169
|
+
text_result = Kotoshu.spellchecker.check("Hello wrold! This is a tst.")
|
|
170
|
+
puts "DocumentResult:"
|
|
171
|
+
puts " Success: #{text_result.success?}"
|
|
172
|
+
puts " Word count: #{text_result.word_count}"
|
|
173
|
+
puts " Error count: #{text_result.error_count}"
|
|
174
|
+
puts " Unique errors: #{text_result.unique_error_count}"
|
|
175
|
+
puts
|
|
176
|
+
puts " Errors:"
|
|
177
|
+
text_result.errors.each do |error|
|
|
178
|
+
suggestions_str = if error.has_suggestions?
|
|
179
|
+
" (suggestions: #{error.top_suggestions(2).join(", ")})"
|
|
180
|
+
else
|
|
181
|
+
""
|
|
182
|
+
end
|
|
183
|
+
puts " • #{error.word} at position #{error.position}#{suggestions_str}"
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
puts
|
|
187
|
+
puts "=" * 40
|
|
188
|
+
puts
|
|
189
|
+
|
|
190
|
+
# Example 6: Multiple File Checking
|
|
191
|
+
puts "6. Batch File Checking"
|
|
192
|
+
puts "-" * 40
|
|
193
|
+
|
|
194
|
+
# Check multiple files
|
|
195
|
+
fixtures_dir = "spec/fixtures/documents"
|
|
196
|
+
if Dir.exist?(fixtures_dir)
|
|
197
|
+
files = Dir.glob(File.join(fixtures_dir, "*.txt"))
|
|
198
|
+
puts "Checking #{files.size} files..."
|
|
199
|
+
puts
|
|
200
|
+
|
|
201
|
+
files.each do |file|
|
|
202
|
+
result = Kotoshu.check_file(file)
|
|
203
|
+
status = result.success? ? "✓" : "✗"
|
|
204
|
+
filename = File.basename(file)
|
|
205
|
+
puts "#{status} #{filename}: #{result.error_count} error(s), #{result.word_count} words"
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
puts
|
|
209
|
+
|
|
210
|
+
# Get all results at once
|
|
211
|
+
results = Kotoshu.check_files(files)
|
|
212
|
+
total_errors = results.sum(&:error_count)
|
|
213
|
+
total_words = results.sum(&:word_count)
|
|
214
|
+
failed_count = results.count(&:failed?)
|
|
215
|
+
|
|
216
|
+
puts "Summary:"
|
|
217
|
+
puts " Files checked: #{files.size}"
|
|
218
|
+
puts " Files with errors: #{failed_count}"
|
|
219
|
+
puts " Total errors: #{total_errors}"
|
|
220
|
+
puts " Total words: #{total_words}"
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
puts
|
|
224
|
+
puts "=" * 40
|
|
225
|
+
puts
|
|
226
|
+
|
|
227
|
+
# Example 7: Error Handling
|
|
228
|
+
puts "7. Error Handling"
|
|
229
|
+
puts "-" * 40
|
|
230
|
+
|
|
231
|
+
begin
|
|
232
|
+
# Try to load a non-existent dictionary
|
|
233
|
+
bad_config = Kotoshu::Configuration.new(
|
|
234
|
+
dictionary_type: :plain_text,
|
|
235
|
+
dictionary_path: "/nonexistent/path.txt"
|
|
236
|
+
)
|
|
237
|
+
bad_config.load_dictionary
|
|
238
|
+
rescue Kotoshu::DictionaryNotFoundError => e
|
|
239
|
+
puts "Caught DictionaryNotFoundError:"
|
|
240
|
+
puts " Message: #{e.message}"
|
|
241
|
+
puts " Path: #{e.path}"
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
puts
|
|
245
|
+
|
|
246
|
+
begin
|
|
247
|
+
# Try to use an invalid dictionary type
|
|
248
|
+
bad_config2 = Kotoshu::Configuration.new(
|
|
249
|
+
dictionary_type: :invalid_type
|
|
250
|
+
)
|
|
251
|
+
bad_config2.load_dictionary
|
|
252
|
+
rescue Kotoshu::ConfigurationError => e
|
|
253
|
+
puts "Caught ConfigurationError:"
|
|
254
|
+
puts " Message: #{e.message}"
|
|
255
|
+
puts " Key: #{e.key.inspect}"
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
puts
|
|
259
|
+
puts "=" * 40
|
|
260
|
+
puts
|
|
261
|
+
|
|
262
|
+
# Example 8: Thread Safety (each instance is independent)
|
|
263
|
+
puts "8. Thread Safety"
|
|
264
|
+
puts "-" * 40
|
|
265
|
+
|
|
266
|
+
# Create two independent spellcheckers
|
|
267
|
+
spell1 = Kotoshu::Spellchecker.new(
|
|
268
|
+
dictionary: Kotoshu::Dictionary::Custom.new(
|
|
269
|
+
words: %w[hello world],
|
|
270
|
+
language_code: "en"
|
|
271
|
+
)
|
|
272
|
+
)
|
|
273
|
+
|
|
274
|
+
spell2 = Kotoshu::Spellchecker.new(
|
|
275
|
+
dictionary: Kotoshu::Dictionary::Custom.new(
|
|
276
|
+
words: %w[ruby python],
|
|
277
|
+
language_code: "en"
|
|
278
|
+
)
|
|
279
|
+
)
|
|
280
|
+
|
|
281
|
+
puts "Spellchecker 1 words: #{spell1.dictionary.words.inspect}"
|
|
282
|
+
puts "Spellchecker 2 words: #{spell2.dictionary.words.inspect}"
|
|
283
|
+
puts
|
|
284
|
+
puts "Spellchecker 1 has 'hello': #{spell1.correct?("hello")}"
|
|
285
|
+
puts "Spellchecker 1 has 'ruby': #{spell1.correct?("ruby")}"
|
|
286
|
+
puts "Spellchecker 2 has 'hello': #{spell2.correct?("hello")}"
|
|
287
|
+
puts "Spellchecker 2 has 'ruby': #{spell2.correct?("ruby")}"
|