kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,119 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Core
5
+ module Trie
6
+ # Builder class for constructing Trie objects.
7
+ # Provides a fluent interface for building tries from various sources.
8
+ class Builder
9
+ def initialize
10
+ @trie = Trie.new
11
+ end
12
+
13
+ # Add a single word to the trie.
14
+ #
15
+ # @param word [String] The word to add
16
+ # @param payload [Object] Optional payload
17
+ # @return [Builder] Self for chaining
18
+ def add_word(word, payload = nil)
19
+ @trie.insert(word, payload)
20
+ self
21
+ end
22
+ alias << add_word
23
+
24
+ # Add multiple words to the trie.
25
+ #
26
+ # @param words [Array<String>] Array of words to add
27
+ # @return [Builder] Self for chaining
28
+ def add_words(words)
29
+ words.each { |word| add_word(word) }
30
+ self
31
+ end
32
+
33
+ # Build a trie from a hash (word => payload mapping).
34
+ #
35
+ # @param hash [Hash] Hash of words to payloads
36
+ # @return [Builder] Self for chaining
37
+ def from_hash(hash)
38
+ hash.each { |word, payload| add_word(word, payload) }
39
+ self
40
+ end
41
+
42
+ # Build a trie from an array of words.
43
+ #
44
+ # @param array [Array<String>] Array of words
45
+ # @return [Builder] Self for chaining
46
+ def from_array(array)
47
+ add_words(array)
48
+ self
49
+ end
50
+
51
+ # Build a trie from a file (one word per line).
52
+ #
53
+ # @param path [String] Path to the file
54
+ # @return [Builder] Self for chaining
55
+ def from_file(path)
56
+ File.foreach(path, chomp: true) do |line|
57
+ next if line.empty? || line.start_with?("#")
58
+
59
+ add_word(line)
60
+ end
61
+ self
62
+ end
63
+
64
+ # Build a trie from a string (newline-separated words).
65
+ #
66
+ # @param text [String] String containing words
67
+ # @return [Builder] Self for chaining
68
+ def from_string(text)
69
+ text.each_line do |line|
70
+ word = line.strip
71
+ next if word.empty? || word.start_with?("#")
72
+
73
+ add_word(word)
74
+ end
75
+ self
76
+ end
77
+
78
+ # Get the built trie.
79
+ #
80
+ # @return [Trie] The constructed trie
81
+ def build
82
+ @trie.freeze
83
+ end
84
+
85
+ # Build a trie from a file path (class method).
86
+ #
87
+ # @param path [String] Path to the file
88
+ # @return [Trie] The constructed trie
89
+ def self.from_file(path)
90
+ new.from_file(path).build
91
+ end
92
+
93
+ # Build a trie from an array of words (class method).
94
+ #
95
+ # @param words [Array<String>] Array of words
96
+ # @return [Trie] The constructed trie
97
+ def self.from_array(words)
98
+ new.from_array(words).build
99
+ end
100
+
101
+ # Build a trie from a hash (class method).
102
+ #
103
+ # @param hash [Hash] Hash of words to payloads
104
+ # @return [Trie] The constructed trie
105
+ def self.from_hash(hash)
106
+ new.from_hash(hash).build
107
+ end
108
+
109
+ # Build a trie from a string (class method).
110
+ #
111
+ # @param text [String] String containing words
112
+ # @return [Trie] The constructed trie
113
+ def self.from_string(text)
114
+ new.from_string(text).build
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,94 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Core
5
+ module Trie
6
+ # Node in the Trie data structure.
7
+ # Each node represents a character and its children.
8
+ class Node
9
+ attr_reader :character, :children, :terminal, :payload
10
+
11
+ def initialize(character = "")
12
+ @character = character
13
+ @children = {}
14
+ @terminal = false
15
+ @payload = nil
16
+ end
17
+
18
+ # Add a child node for the given character.
19
+ #
20
+ # @param character [String] The character to add
21
+ # @return [Node] The new or existing child node
22
+ def add_child(character)
23
+ @children[character] ||= Node.new(character)
24
+ end
25
+
26
+ # Get child node for the given character.
27
+ #
28
+ # @param character [String] The character to look up
29
+ # @return [Node, nil] The child node or nil if not found
30
+ def child(character)
31
+ @children[character]
32
+ end
33
+
34
+ # Check if this node has a child for the given character.
35
+ #
36
+ # @param character [String] The character to check
37
+ # @return [Boolean] True if child exists
38
+ def has_child?(character)
39
+ @children.key?(character)
40
+ end
41
+
42
+ # Mark this node as terminal (end of a word).
43
+ #
44
+ # @param payload [Object] Optional payload to store at this node
45
+ def mark_terminal(payload = nil)
46
+ @terminal = true
47
+ @payload = payload
48
+ end
49
+
50
+ # Check if this node is terminal.
51
+ #
52
+ # @return [Boolean] True if this is the end of a word
53
+ def terminal?
54
+ @terminal
55
+ end
56
+
57
+ # Get all children of this node.
58
+ #
59
+ # @return [Hash] Hash of character to node mappings
60
+ def all_children
61
+ @children
62
+ end
63
+
64
+ # Check if this node has any children.
65
+ #
66
+ # @return [Boolean] True if there are children
67
+ def has_children?
68
+ !@children.empty?
69
+ end
70
+
71
+ # Get the number of children.
72
+ #
73
+ # @return [Integer] Number of child nodes
74
+ def child_count
75
+ @children.size
76
+ end
77
+
78
+ # Convert node to string representation.
79
+ #
80
+ # @return [String] String representation
81
+ def to_s
82
+ "Node('#{@character}', terminal: #{@terminal}, children: #{@children.keys})"
83
+ end
84
+
85
+ # Inspect the node.
86
+ #
87
+ # @return [String] Inspection string
88
+ def inspect
89
+ to_s
90
+ end
91
+ end
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,249 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Core
5
+ module Trie
6
+ # Trie (prefix tree) data structure for efficient word storage and lookup.
7
+ # Supports prefix matching, word validation, and traversal.
8
+ class Trie
9
+ attr_reader :root, :size
10
+
11
+ def initialize
12
+ @root = Node.new
13
+ @size = 0
14
+ end
15
+
16
+ # Insert a word into the trie.
17
+ #
18
+ # @param word [String] The word to insert
19
+ # @param payload [Object] Optional payload to store with the word
20
+ # @return [Trie] Self for chaining
21
+ def insert(word, payload = nil)
22
+ node = @root
23
+ word.each_char do |char|
24
+ node = node.add_child(char)
25
+ end
26
+
27
+ # Only increment size if this is a new word
28
+ @size += 1 unless node.terminal?
29
+ node.mark_terminal(payload)
30
+
31
+ self
32
+ end
33
+
34
+ # Check if a word exists in the trie.
35
+ #
36
+ # @param word [String] The word to look up
37
+ # @return [Boolean] True if the word exists
38
+ def lookup(word)
39
+ node = find_node(word)
40
+ !node.nil? && node.terminal?
41
+ end
42
+ alias has_word? lookup
43
+ alias contains? lookup
44
+
45
+ # Check if any words in the trie start with the given prefix.
46
+ #
47
+ # @param prefix [String] The prefix to check
48
+ # @return [Boolean] True if any words have this prefix
49
+ def has_prefix?(prefix)
50
+ !find_node(prefix).nil?
51
+ end
52
+
53
+ # Get the node for a given word/prefix.
54
+ #
55
+ # @param word [String] The word or prefix to find
56
+ # @return [Node, nil] The node or nil if not found
57
+ def find_node(word)
58
+ node = @root
59
+ word.each_char do |char|
60
+ return nil unless node.has_child?(char)
61
+
62
+ node = node.child(char)
63
+ end
64
+ node
65
+ end
66
+
67
+ # Get all words with the given prefix.
68
+ #
69
+ # @param prefix [String] The prefix to match
70
+ # @return [Array<String>] Array of words with the prefix
71
+ def words_with_prefix(prefix)
72
+ start_node = find_node(prefix)
73
+ return [] if start_node.nil?
74
+
75
+ words = []
76
+ collect_words(start_node, prefix, words)
77
+ words
78
+ end
79
+
80
+ # Get all words in the trie.
81
+ #
82
+ # @return [Array<String>] Array of all words
83
+ def all_words
84
+ words = []
85
+ collect_words(@root, "", words)
86
+ words
87
+ end
88
+
89
+ # Count words with the given prefix.
90
+ #
91
+ # @param prefix [String] The prefix to count
92
+ # @return [Integer] Number of words with the prefix
93
+ def count_prefix(prefix)
94
+ words_with_prefix(prefix).size
95
+ end
96
+
97
+ # Get suggestions for a word based on prefix matching.
98
+ # Returns words that share the longest common prefix.
99
+ #
100
+ # @param word [String] The word to get suggestions for
101
+ # @param max_results [Integer] Maximum number of results
102
+ # @return [Array<String>] Array of suggested words
103
+ def suggestions(word, max_results: 10)
104
+ # Find the longest matching prefix
105
+ node = @root
106
+ i = 0
107
+
108
+ while i < word.length && node.has_child?(word[i])
109
+ node = node.child(word[i])
110
+ i += 1
111
+ end
112
+
113
+ # Collect all completions from this point
114
+ words = []
115
+ collect_words_limited(node, word[0...i], words, max_results)
116
+ words
117
+ end
118
+
119
+ # Iterate over all words in the trie.
120
+ #
121
+ # @yield [word, payload] Each word and its optional payload
122
+ # @return [Enumerator] Enumerator if no block given
123
+ def each_word
124
+ return enum_for(:each_word) unless block_given?
125
+
126
+ traverse(@root, "") do |word, node|
127
+ yield word, node.payload if node.terminal?
128
+ end
129
+
130
+ self
131
+ end
132
+
133
+ # Traverse the trie with a visitor.
134
+ #
135
+ # @yield [prefix, node] Each prefix and node visited
136
+ # @return [Trie] Self for chaining
137
+ def traverse(node = @root, prefix = "", &block)
138
+ return enum_for(:traverse, node, prefix) unless block_given?
139
+
140
+ yield prefix, node
141
+
142
+ node.all_children.each_value do |child|
143
+ traverse(child, prefix + child.character, &block)
144
+ end
145
+
146
+ self
147
+ end
148
+
149
+ # Check if the trie is empty.
150
+ #
151
+ # @return [Boolean] True if trie has no words
152
+ def empty?
153
+ @size.zero?
154
+ end
155
+
156
+ # Clear all words from the trie.
157
+ #
158
+ # @return [Trie] Self for chaining
159
+ def clear
160
+ @root = Node.new
161
+ @size = 0
162
+ self
163
+ end
164
+
165
+ # Merge another trie into this one.
166
+ #
167
+ # @param other [Trie] The trie to merge
168
+ # @return [Trie] Self for chaining
169
+ def merge!(other)
170
+ other.each_word do |word, payload|
171
+ insert(word, payload)
172
+ end
173
+ self
174
+ end
175
+
176
+ # Create a new trie with common words from two tries.
177
+ #
178
+ # @param other [Trie] The other trie
179
+ # @return [Trie] New trie with common words
180
+ def &(other)
181
+ result = Trie.new
182
+ each_word do |word, _payload|
183
+ result.insert(word) if other.lookup(word)
184
+ end
185
+ result
186
+ end
187
+
188
+ # Create a new trie with words from either trie.
189
+ #
190
+ # @param other [Trie] The other trie
191
+ # @return [Trie] New trie with all words
192
+ def |(other)
193
+ result = Trie.new
194
+ each_word { |word, payload| result.insert(word, payload) }
195
+ other.each_word { |word, payload| result.insert(word, payload) }
196
+ result
197
+ end
198
+
199
+ # Convert trie to string representation.
200
+ #
201
+ # @return [String] String representation
202
+ def to_s
203
+ "Trie(size: #{@size})"
204
+ end
205
+
206
+ # Inspect the trie.
207
+ #
208
+ # @return [String] Inspection string
209
+ def inspect
210
+ to_s
211
+ end
212
+
213
+ private
214
+
215
+ # Collect all words from a given node.
216
+ #
217
+ # @param node [Node] The starting node
218
+ # @param prefix [String] The current prefix
219
+ # @param words [Array] Array to collect words into
220
+ def collect_words(node, prefix, words)
221
+ words << prefix if node.terminal?
222
+
223
+ node.all_children.each do |char, child|
224
+ collect_words(child, prefix + char, words)
225
+ end
226
+ end
227
+
228
+ # Collect words with a limit.
229
+ #
230
+ # @param node [Node] The starting node
231
+ # @param prefix [String] The current prefix
232
+ # @param words [Array] Array to collect words into
233
+ # @param limit [Integer] Maximum number of words to collect
234
+ def collect_words_limited(node, prefix, words, limit)
235
+ return if words.size >= limit
236
+
237
+ words << prefix if node.terminal?
238
+
239
+ return if words.size >= limit
240
+
241
+ node.all_children.each_value do |child|
242
+ collect_words_limited(child, prefix + child.character, words, limit)
243
+ break if words.size >= limit
244
+ end
245
+ end
246
+ end
247
+ end
248
+ end
249
+ end
@@ -0,0 +1,28 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ # Core domain models and infrastructure.
5
+ #
6
+ # This module contains the fundamental domain models for the spell checker:
7
+ # - IndexedDictionary: Fast word lookup with multiple indexes
8
+ # - Trie: Prefix tree data structure for efficient string operations
9
+ # - Models: Value objects and result types
10
+ #
11
+ # @example Creating an indexed dictionary
12
+ # dict = Kotoshu::Core::IndexedDictionary.new(%w[hello world test])
13
+ # dict.include?("hello") # => true
14
+ #
15
+ # @example Creating a trie
16
+ # trie = Kotoshu::Core::Trie::Trie.new
17
+ # trie.insert("hello")
18
+ # trie.lookup("hello") # => true
19
+ module Core
20
+ end
21
+ end
22
+
23
+ # Require core submodules
24
+ require_relative "core/exceptions"
25
+ require_relative "core/indexed_dictionary"
26
+ require_relative "core/trie/trie"
27
+ require_relative "core/trie/builder"
28
+ require_relative "core/trie/node"