kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,174 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Suggestions
5
+ # A single suggestion with associated metadata and behavior.
6
+ # This is MORE model-driven than Spylls which returns plain strings.
7
+ class Suggestion
8
+ attr_reader :word, :distance, :confidence, :source, :metadata
9
+
10
+ # @param word [String] The suggested word
11
+ # @param distance [Integer] Edit distance from original (lower is better)
12
+ # @param confidence [Float] Confidence score (0.0 to 1.0, higher is better)
13
+ # @param source [String, Symbol] The strategy that produced this suggestion
14
+ # @param metadata [Hash] Additional metadata about the suggestion
15
+ def initialize(word:, distance: 0, confidence: 1.0, source: :unknown, **metadata)
16
+ @word = word
17
+ @distance = distance
18
+ @confidence = confidence
19
+ @source = source
20
+ @metadata = metadata
21
+ freeze
22
+ end
23
+
24
+ # Check if this is a high-confidence suggestion.
25
+ #
26
+ # @return [Boolean] True if confidence >= 0.8
27
+ def high_confidence?
28
+ @confidence >= 0.8
29
+ end
30
+
31
+ # Check if this is a low-confidence suggestion.
32
+ #
33
+ # @return [Boolean] True if confidence < 0.5
34
+ def low_confidence?
35
+ @confidence < 0.5
36
+ end
37
+
38
+ # Calculate combined score considering distance and confidence.
39
+ #
40
+ # @param distance_weight [Float] Weight for distance (default: 0.3)
41
+ # @param confidence_weight [Float] Weight for confidence (default: 0.7)
42
+ # @return [Float] Combined score (0.0 to 1.0, higher is better)
43
+ def combined_score(distance_weight: 0.3, confidence_weight: 0.7)
44
+ # Normalize distance (assume max meaningful distance is 5)
45
+ normalized_distance = [@distance, 5].min / 5.0
46
+ distance_score = 1.0 - normalized_distance
47
+
48
+ (distance_score * distance_weight) + (@confidence * confidence_weight)
49
+ end
50
+
51
+ # Check if this suggestion is the same word as another.
52
+ #
53
+ # @param other [Suggestion, String] The other suggestion or word string
54
+ # @return [Boolean] True if words match (case-insensitive)
55
+ def same_word?(other)
56
+ other_word = other.is_a?(Suggestion) ? other.word : other.to_s
57
+ @word.downcase == other_word.downcase
58
+ end
59
+
60
+ # Check if this suggestion comes from a specific source.
61
+ #
62
+ # @param source [String, Symbol] The source to check
63
+ # @return [Boolean] True if this suggestion came from the source
64
+ def from_source?(source)
65
+ @source == source
66
+ end
67
+
68
+ # Compare suggestions for sorting (higher combined score first).
69
+ #
70
+ # Ranking priority (following CSpell/Hunspell approach):
71
+ # 1. Combined score (higher is better)
72
+ # 2. Edit distance (lower is better)
73
+ # 3. Length similarity (prefer similar length to original word)
74
+ # 4. N-gram similarity (more shared n-grams is better)
75
+ # 5. Alphabetical (ONLY as final tiebreaker)
76
+ #
77
+ # @param other [Suggestion] The other suggestion
78
+ # @return [Integer] -1, 0, or 1
79
+ def <=>(other)
80
+ # First by combined score (descending)
81
+ score_cmp = other.combined_score <=> combined_score
82
+ return score_cmp unless score_cmp.zero?
83
+
84
+ # Then by distance (ascending)
85
+ distance_cmp = @distance <=> other.distance
86
+ return distance_cmp unless distance_cmp.zero?
87
+
88
+ # Then by length similarity (like CSpell - prefer words of similar length)
89
+ # We need access to original word length, which is stored in metadata
90
+ orig_len = @metadata[:original_length] || @word.length
91
+ other_orig_len = other.metadata[:original_length] || other.word.length
92
+
93
+ # Calculate absolute difference from original length
94
+ my_len_diff = (@word.length - orig_len).abs
95
+ other_len_diff = (other.word.length - other_orig_len).abs
96
+
97
+ len_cmp = my_len_diff <=> other_len_diff
98
+ return len_cmp unless len_cmp.zero?
99
+
100
+ # Then by n-gram similarity (like Hunspell - more shared n-grams is better)
101
+ # We use pre-computed n-gram score from metadata if available
102
+ my_ngram = @metadata[:ngram_score] || 0
103
+ other_ngram = other.metadata[:ngram_score] || 0
104
+
105
+ ngram_cmp = other_ngram <=> my_ngram # Higher is better
106
+ return ngram_cmp unless ngram_cmp.zero?
107
+
108
+ # Finally by word alphabetically (ascending) - ONLY as final tiebreaker
109
+ @word.downcase <=> other.word.downcase
110
+ end
111
+
112
+ # Check equality with another suggestion.
113
+ #
114
+ # @param other [Object] The other object
115
+ # @return [Boolean] True if equal
116
+ def ==(other)
117
+ return false unless other.is_a?(Suggestion)
118
+
119
+ @word.downcase == other.word.downcase
120
+ end
121
+ alias eql? ==
122
+
123
+ # Hash value for use in Hash keys.
124
+ #
125
+ # @return [Integer] Hash code
126
+ def hash
127
+ @word.downcase.hash
128
+ end
129
+
130
+ # Convert suggestion to hash.
131
+ #
132
+ # @return [Hash] Suggestion as hash
133
+ def to_h
134
+ {
135
+ word: @word,
136
+ distance: @distance,
137
+ confidence: @confidence,
138
+ source: @source,
139
+ combined_score: combined_score
140
+ }.merge(@metadata)
141
+ end
142
+
143
+ # Convert suggestion to JSON-compatible hash.
144
+ #
145
+ # @return [Hash] JSON-compatible hash
146
+ def as_json(*)
147
+ to_h
148
+ end
149
+
150
+ # String representation.
151
+ #
152
+ # @return [String] String representation
153
+ def to_s
154
+ "Suggestion(word: '#{@word}', distance: #{@distance}, confidence: #{format("%.2f", @confidence)}, source: #{@source})"
155
+ end
156
+
157
+ # Inspect the suggestion.
158
+ #
159
+ # @return [String] Inspection string
160
+ def inspect
161
+ to_s
162
+ end
163
+
164
+ # Create a suggestion from a simple word (convenience method).
165
+ #
166
+ # @param word [String] The word
167
+ # @param source [String, Symbol] The source
168
+ # @return [Suggestion] New suggestion
169
+ def self.from_word(word, source: :unknown)
170
+ new(word: word, distance: 0, confidence: 1.0, source: source)
171
+ end
172
+ end
173
+ end
174
+ end
@@ -0,0 +1,238 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "suggestion"
4
+
5
+ module Kotoshu
6
+ module Suggestions
7
+ # A collection of suggestions with rich query methods.
8
+ # This is MORE OOP than Spylls which returns plain iterators of strings.
9
+ class SuggestionSet
10
+ include Enumerable
11
+
12
+ attr_reader :suggestions, :max_size
13
+
14
+ # @param suggestions [Array<Suggestion>] Initial suggestions
15
+ # @param max_size [Integer] Maximum number of suggestions to keep
16
+ def initialize(suggestions = [], max_size: 10)
17
+ @suggestions = suggestions
18
+ @max_size = max_size
19
+ sort_and_limit!
20
+ end
21
+
22
+ # Add a suggestion to the set.
23
+ #
24
+ # @param suggestion [Suggestion] The suggestion to add
25
+ # @return [SuggestionSet] Self for chaining
26
+ def add(suggestion)
27
+ @suggestions << suggestion
28
+ sort_and_limit!
29
+ self
30
+ end
31
+ alias << add
32
+
33
+ # Add multiple suggestions.
34
+ #
35
+ # @param new_suggestions [Array<Suggestion>] Suggestions to add
36
+ # @return [SuggestionSet] Self for chaining
37
+ def concat(new_suggestions)
38
+ @suggestions.concat(new_suggestions)
39
+ sort_and_limit!
40
+ self
41
+ end
42
+
43
+ # Merge another suggestion set into this one.
44
+ #
45
+ # @param other [SuggestionSet] The other set
46
+ # @return [SuggestionSet] Self for chaining
47
+ def merge!(other)
48
+ concat(other.suggestions)
49
+ self
50
+ end
51
+
52
+ # Get suggestions by source.
53
+ #
54
+ # @param source [String, Symbol] The source to filter by
55
+ # @return [SuggestionSet] New set with filtered suggestions
56
+ def from_source(source)
57
+ SuggestionSet.new(@suggestions.select { |s| s.from_source?(source) }, max_size: @max_size)
58
+ end
59
+
60
+ # Get high-confidence suggestions.
61
+ #
62
+ # @return [SuggestionSet] New set with high-confidence suggestions
63
+ def high_confidence
64
+ SuggestionSet.new(@suggestions.select(&:high_confidence?), max_size: @max_size)
65
+ end
66
+
67
+ # Get low-confidence suggestions.
68
+ #
69
+ # @return [SuggestionSet] New set with low-confidence suggestions
70
+ def low_confidence
71
+ SuggestionSet.new(@suggestions.select(&:low_confidence?), max_size: @max_size)
72
+ end
73
+
74
+ # Get suggestions within a distance range.
75
+ #
76
+ # @param min_distance [Integer] Minimum distance
77
+ # @param max_distance [Integer] Maximum distance
78
+ # @return [SuggestionSet] New set with filtered suggestions
79
+ def within_distance(min_distance: 0, max_distance: 2)
80
+ filtered = @suggestions.select do |s|
81
+ s.distance >= min_distance && s.distance <= max_distance
82
+ end
83
+ SuggestionSet.new(filtered, max_size: @max_size)
84
+ end
85
+
86
+ # Check if set contains a specific word.
87
+ #
88
+ # @param word [String] The word to check
89
+ # @return [Boolean] True if word is in suggestions
90
+ def include?(word)
91
+ @suggestions.any? { |s| s.same_word?(word) }
92
+ end
93
+ alias has_word? include?
94
+
95
+ # Find a suggestion by word.
96
+ #
97
+ # @param word [String] The word to find
98
+ # @return [Suggestion, nil] The suggestion or nil
99
+ def find_word(word)
100
+ @suggestions.find { |s| s.same_word?(word) }
101
+ end
102
+
103
+ # Get the top N suggestions.
104
+ #
105
+ # @param n [Integer] Number of suggestions to get
106
+ # @return [Array<Suggestion>] Top N suggestions
107
+ def top(n)
108
+ @suggestions.first(n)
109
+ end
110
+
111
+ # Get the first (best) suggestion.
112
+ #
113
+ # @return [Suggestion, nil] The best suggestion or nil
114
+ def first
115
+ @suggestions.first
116
+ end
117
+
118
+ # Get the last suggestion.
119
+ #
120
+ # @return [Suggestion, nil] The last suggestion or nil
121
+ def last
122
+ @suggestions.last
123
+ end
124
+
125
+ # Check if the set is empty.
126
+ #
127
+ # @return [Boolean] True if no suggestions
128
+ def empty?
129
+ @suggestions.empty?
130
+ end
131
+
132
+ # Get the number of suggestions.
133
+ #
134
+ # @return [Integer] Number of suggestions
135
+ def size
136
+ @suggestions.size
137
+ end
138
+ alias count size
139
+ alias length size
140
+
141
+ # Iterate over suggestions.
142
+ #
143
+ # @yield [suggestion] Each suggestion
144
+ # @return [Enumerator] Enumerator if no block given
145
+ def each(&block)
146
+ return enum_for(:each) unless block_given?
147
+
148
+ @suggestions.each(&block)
149
+ end
150
+
151
+ # Get unique suggestions (by word, case-insensitive).
152
+ #
153
+ # @return [SuggestionSet] New set with unique suggestions
154
+ def unique
155
+ seen = {}
156
+ unique_suggestions = @suggestions.select do |s|
157
+ word = s.word.downcase
158
+ if seen[word]
159
+ false
160
+ else
161
+ seen[word] = true
162
+ true
163
+ end
164
+ end
165
+ SuggestionSet.new(unique_suggestions, max_size: @max_size)
166
+ end
167
+
168
+ # Convert to array of words.
169
+ #
170
+ # @return [Array<String>] Array of suggestion words
171
+ def to_words
172
+ @suggestions.map(&:word)
173
+ end
174
+ alias words to_words
175
+
176
+ # Convert to array of hashes.
177
+ #
178
+ # @return [Array<Hash>] Array of suggestion hashes
179
+ def to_a
180
+ @suggestions.map(&:to_h)
181
+ end
182
+
183
+ # Convert to JSON-compatible array.
184
+ #
185
+ # @return [Array<Hash>] JSON-compatible array
186
+ def as_json(*)
187
+ to_a
188
+ end
189
+
190
+ # String representation.
191
+ #
192
+ # @return [String] String representation
193
+ def to_s
194
+ "SuggestionSet(size: #{size}, max_size: #{@max_size})"
195
+ end
196
+
197
+ # Inspect the suggestion set.
198
+ #
199
+ # @return [String] Inspection string
200
+ def inspect
201
+ if @suggestions.empty?
202
+ to_s
203
+ else
204
+ "#{self} [#{@suggestions.map(&:word).join(", ")}]"
205
+ end
206
+ end
207
+
208
+ # Create an empty suggestion set.
209
+ #
210
+ # @param max_size [Integer] Maximum size
211
+ # @return [SuggestionSet] Empty set
212
+ def self.empty(max_size: 10)
213
+ new([], max_size: max_size)
214
+ end
215
+
216
+ # Create a suggestion set from an array of words.
217
+ #
218
+ # @param words [Array<String>] Array of words
219
+ # @param source [String, Symbol] The source
220
+ # @param max_size [Integer] Maximum size
221
+ # @return [SuggestionSet] New set
222
+ def self.from_words(words, source: :unknown, max_size: 10)
223
+ suggestions = words.map { |w| Suggestion.from_word(w, source: source) }
224
+ new(suggestions, max_size: max_size)
225
+ end
226
+
227
+ private
228
+
229
+ # Sort suggestions by combined score and limit to max_size.
230
+ #
231
+ def sort_and_limit!
232
+ @suggestions.sort!
233
+ @suggestions.uniq! { |s| s.word.downcase }
234
+ @suggestions = @suggestions.first(@max_size)
235
+ end
236
+ end
237
+ end
238
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ VERSION = "0.3.0"
5
+ end