kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,263 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "word_result"
4
+
5
+ module Kotoshu
6
+ module Models
7
+ module Result
8
+ # Result object for checking a document or file.
9
+ #
10
+ # This is a value object that represents the result of checking
11
+ # an entire document for spelling errors.
12
+ #
13
+ # @note This class is immutable and frozen on initialization.
14
+ #
15
+ # @example Creating a successful document result
16
+ # result = DocumentResult.new(
17
+ # file: "README.md",
18
+ # errors: [],
19
+ # word_count: 150
20
+ # )
21
+ # result.success? # => true
22
+ # result.error_count # => 0
23
+ #
24
+ # @example Creating a result with errors
25
+ # errors = [WordResult.incorrect("helo"), WordResult.incorrect("wrold")]
26
+ # result = DocumentResult.new(
27
+ # file: "document.txt",
28
+ # errors: errors,
29
+ # word_count: 100
30
+ # )
31
+ # result.success? # => false
32
+ # result.error_count # => 2
33
+ class DocumentResult
34
+ # @return [String, nil] The file path (if applicable)
35
+ attr_reader :file
36
+
37
+ # @return [Array<WordResult>] List of spelling errors found
38
+ attr_reader :errors
39
+
40
+ # @return [Integer] Total word count
41
+ attr_reader :word_count
42
+
43
+ # @return [Hash] Additional metadata
44
+ attr_reader :metadata
45
+
46
+ # Create a new DocumentResult.
47
+ #
48
+ # @param file [String, nil] The file path (optional)
49
+ # @param errors [Array<WordResult>] List of errors
50
+ # @param word_count [Integer] Total word count
51
+ # @param metadata [Hash] Additional metadata (optional)
52
+ def initialize(file: nil, errors: [], word_count: 0, metadata: {})
53
+ @file = file&.dup&.freeze
54
+ @errors = errors.dup.freeze
55
+ @word_count = word_count
56
+ @metadata = metadata.dup.freeze
57
+
58
+ freeze
59
+ end
60
+
61
+ # Check if the document check was successful (no errors).
62
+ #
63
+ # @return [Boolean] True if no errors were found
64
+ def success?
65
+ @errors.empty?
66
+ end
67
+
68
+ # Check if the document check failed (has errors).
69
+ #
70
+ # @return [Boolean] True if errors were found
71
+ def failed?
72
+ !success?
73
+ end
74
+
75
+ # Get the number of errors found.
76
+ #
77
+ # @return [Integer] Error count
78
+ def error_count
79
+ @errors.size
80
+ end
81
+
82
+ # Get the number of unique errors (by word).
83
+ #
84
+ # @return [Integer] Unique error count
85
+ def unique_error_count
86
+ @errors.map(&:word).uniq.size
87
+ end
88
+
89
+ # Check if a specific word has an error.
90
+ #
91
+ # @param word [String] The word to check
92
+ # @return [Boolean] True if the word has an error
93
+ def has_error_for?(word)
94
+ @errors.any? { |e| e.word == word }
95
+ end
96
+
97
+ # Get errors for a specific word.
98
+ #
99
+ # @param word [String] The word
100
+ # @return [Array<WordResult>] Errors for the word
101
+ def errors_for(word)
102
+ @errors.select { |e| e.word == word }
103
+ end
104
+
105
+ # Iterate over errors.
106
+ #
107
+ # @yield [error] Each error
108
+ # @return [Enumerator] Enumerator if no block given
109
+ def each_error(&block)
110
+ return enum_for(:each_error) unless block_given?
111
+
112
+ @errors.each(&block)
113
+ end
114
+
115
+ # Iterate over unique error words.
116
+ #
117
+ # @yield [word, errors] Each unique word and its errors
118
+ # @return [Enumerator] Enumerator if no block given
119
+ def each_unique_error(&block)
120
+ return enum_for(:each_unique_error) unless block_given?
121
+
122
+ @errors.group_by(&:word).each(&block)
123
+ end
124
+
125
+ # Get the first N errors.
126
+ #
127
+ # @param n [Integer] Number of errors to return
128
+ # @return [Array<WordResult>] First N errors
129
+ def first_errors(n = 10)
130
+ @errors.first(n)
131
+ end
132
+
133
+ # Get error summary as a hash.
134
+ #
135
+ # @return [Hash] Summary of errors
136
+ def error_summary
137
+ summary = Hash.new(0)
138
+ each_error do |error|
139
+ summary[error.word] += 1
140
+ end
141
+ summary
142
+ end
143
+
144
+ # Convert to hash.
145
+ #
146
+ # @return [Hash] Hash representation
147
+ def to_h
148
+ {
149
+ file: @file,
150
+ success: success?,
151
+ word_count: @word_count,
152
+ error_count: error_count,
153
+ unique_error_count: unique_error_count,
154
+ errors: @errors.map(&:to_h),
155
+ error_summary: error_summary,
156
+ metadata: @metadata
157
+ }
158
+ end
159
+
160
+ # Convert to JSON-compatible hash.
161
+ #
162
+ # @return [Hash] JSON-compatible hash
163
+ def as_json
164
+ {
165
+ "file" => @file,
166
+ "success" => success?,
167
+ "wordCount" => @word_count,
168
+ "errorCount" => error_count,
169
+ "uniqueErrorCount" => unique_error_count,
170
+ "errors" => @errors.map(&:as_json),
171
+ "errorSummary" => error_summary,
172
+ "metadata" => @metadata
173
+ }
174
+ end
175
+
176
+ # Check equality based on file and errors.
177
+ #
178
+ # @param other [DocumentResult] The other result
179
+ # @return [Boolean] True if equal
180
+ def ==(other)
181
+ return false unless other.is_a?(DocumentResult)
182
+
183
+ @file == other.file && @errors == other.errors
184
+ end
185
+ alias eql? ==
186
+
187
+ # Hash based on file and errors.
188
+ #
189
+ # @return [Integer] Hash code
190
+ def hash
191
+ [@file, @errors].hash
192
+ end
193
+
194
+ # String representation.
195
+ #
196
+ # @return [String] String representation
197
+ def to_s
198
+ if success?
199
+ if @file
200
+ "File '#{@file}': No spelling errors found (#{@word_count} words checked)"
201
+ else
202
+ "No spelling errors found (#{@word_count} words checked)"
203
+ end
204
+ else
205
+ prefix = @file ? "File '#{@file}':" : ""
206
+ "#{prefix} #{error_count} spelling error(s) found " \
207
+ "(#{unique_error_count} unique) in #{@word_count} words"
208
+ end
209
+ end
210
+ alias inspect to_s
211
+
212
+ # Create a successful document result.
213
+ #
214
+ # @param file [String, nil] The file path (optional)
215
+ # @param word_count [Integer] Total word count
216
+ # @return [DocumentResult] New result indicating success
217
+ #
218
+ # @example
219
+ # DocumentResult.success(file: "README.md", word_count: 150)
220
+ def self.success(file: nil, word_count: 0)
221
+ new(file: file, errors: [], word_count: word_count)
222
+ end
223
+
224
+ # Create a failed document result.
225
+ #
226
+ # @param file [String, nil] The file path (optional)
227
+ # @param errors [Array<WordResult>] List of errors
228
+ # @param word_count [Integer] Total word count
229
+ # @return [DocumentResult] New result indicating failure
230
+ #
231
+ # @example
232
+ # errors = [WordResult.incorrect("helo"), WordResult.incorrect("wrold")]
233
+ # DocumentResult.failure(file: "doc.txt", errors: errors, word_count: 100)
234
+ def self.failure(file: nil, errors: [], word_count: 0)
235
+ new(file: file, errors: errors, word_count: word_count)
236
+ end
237
+
238
+ # Merge multiple document results.
239
+ #
240
+ # @param results [Array<DocumentResult>] Results to merge
241
+ # @return [DocumentResult] Merged result
242
+ #
243
+ # @example Merging results from multiple files
244
+ # result1 = DocumentResult.new(file: "file1.txt", errors: [e1], word_count: 50)
245
+ # result2 = DocumentResult.new(file: "file2.txt", errors: [e2, e3], word_count: 75)
246
+ # DocumentResult.merge([result1, result2])
247
+ # # => DocumentResult with 3 errors and 125 words
248
+ def self.merge(results)
249
+ return new if results.empty?
250
+
251
+ all_errors = results.flat_map(&:errors)
252
+ total_words = results.sum(&:word_count)
253
+
254
+ new(
255
+ file: nil, # Merged results don't have a single file
256
+ errors: all_errors,
257
+ word_count: total_words
258
+ )
259
+ end
260
+ end
261
+ end
262
+ end
263
+ end
@@ -0,0 +1,203 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "../../../suggestions/suggestion_set"
4
+
5
+ module Kotoshu
6
+ module Models
7
+ module Result
8
+ # Result object for checking a single word.
9
+ #
10
+ # This is a value object that represents the result of checking
11
+ # a single word for spelling errors, including any suggestions.
12
+ #
13
+ # @note This class is immutable and frozen on initialization.
14
+ #
15
+ # @example Creating a correct word result
16
+ # result = WordResult.new("hello", correct: true)
17
+ # result.correct? # => true
18
+ # result.word # => "hello"
19
+ # result.suggestions # => SuggestionSet.empty
20
+ #
21
+ # @example Creating an incorrect word result with suggestions
22
+ # suggestions = SuggestionSet.from_words(%w[hello help], source: :test)
23
+ # result = WordResult.new("helo", correct: false, suggestions: suggestions)
24
+ # result.correct? # => false
25
+ # result.has_suggestions? # => true
26
+ class WordResult
27
+ # @return [String] The word that was checked
28
+ attr_reader :word
29
+
30
+ # @return [Boolean] Whether the word is spelled correctly
31
+ attr_reader :correct
32
+
33
+ # @return [Suggestions::SuggestionSet] Suggestions for correction
34
+ attr_reader :suggestions
35
+
36
+ # @return [Integer] The position of the word in the source text (optional)
37
+ attr_reader :position
38
+
39
+ # @return [Hash] Additional metadata
40
+ attr_reader :metadata
41
+
42
+ # Create a new WordResult.
43
+ #
44
+ # @param word [String] The word that was checked
45
+ # @param correct [Boolean] Whether the word is correct
46
+ # @param suggestions [Suggestions::SuggestionSet] Suggestions (optional)
47
+ # @param position [Integer] Position in source text (optional)
48
+ # @param metadata [Hash] Additional metadata (optional)
49
+ def initialize(word, correct:, suggestions: nil, position: nil, metadata: {})
50
+ word = "" if word.nil?
51
+
52
+ @word = word.dup.freeze
53
+ @correct = correct
54
+ @suggestions = suggestions || Suggestions::SuggestionSet.empty
55
+ @position = position
56
+ @metadata = metadata.dup.freeze
57
+
58
+ freeze
59
+ end
60
+
61
+ # Check if the word is correct.
62
+ #
63
+ # @return [Boolean] True if the word is spelled correctly
64
+ def correct?
65
+ @correct
66
+ end
67
+
68
+ # Check if the word is incorrect.
69
+ #
70
+ # @return [Boolean] True if the word is misspelled
71
+ def incorrect?
72
+ !@correct
73
+ end
74
+
75
+ # Check if there are suggestions.
76
+ #
77
+ # @return [Boolean] True if suggestions are available
78
+ def has_suggestions?
79
+ !@suggestions.empty?
80
+ end
81
+
82
+ # Get the number of suggestions.
83
+ #
84
+ # @return [Integer] Number of suggestions
85
+ def suggestion_count
86
+ @suggestions.size
87
+ end
88
+
89
+ # Get the top N suggestions.
90
+ #
91
+ # @param n [Integer] Number of suggestions to return
92
+ # @return [Array<String>] Top N suggestion words
93
+ def top_suggestions(n = 3)
94
+ @suggestions.top(n).map(&:word)
95
+ end
96
+
97
+ # Get the first (best) suggestion.
98
+ #
99
+ # @return [String, nil] The best suggestion or nil
100
+ def first_suggestion
101
+ @suggestions.first&.word
102
+ end
103
+
104
+ # Convert to hash.
105
+ #
106
+ # @return [Hash] Hash representation
107
+ def to_h
108
+ {
109
+ word: @word,
110
+ correct: @correct,
111
+ position: @position,
112
+ suggestion_count: suggestion_count,
113
+ suggestions: top_suggestions(10),
114
+ metadata: @metadata
115
+ }
116
+ end
117
+
118
+ # Convert to JSON-compatible hash.
119
+ #
120
+ # @return [Hash] JSON-compatible hash
121
+ def as_json
122
+ {
123
+ "word" => @word,
124
+ "correct" => @correct,
125
+ "position" => @position,
126
+ "suggestionCount" => suggestion_count,
127
+ "suggestions" => top_suggestions(10),
128
+ "metadata" => @metadata
129
+ }
130
+ end
131
+
132
+ # Check equality based on word and correctness.
133
+ #
134
+ # @param other [WordResult] The other result
135
+ # @return [Boolean] True if equal
136
+ def ==(other)
137
+ return false unless other.is_a?(WordResult)
138
+
139
+ @word == other.word && @correct == other.correct
140
+ end
141
+ alias eql? ==
142
+
143
+ # Hash based on word and correctness.
144
+ #
145
+ # @return [Integer] Hash code
146
+ def hash
147
+ [@word, @correct].hash
148
+ end
149
+
150
+ # String representation.
151
+ #
152
+ # @return [String] String representation
153
+ def to_s
154
+ if @correct
155
+ @word
156
+ elsif has_suggestions?
157
+ "#{@word} (did you mean #{top_suggestions(3).join(", ")}?)"
158
+ else
159
+ "#{@word} (no suggestions)"
160
+ end
161
+ end
162
+ alias inspect to_s
163
+
164
+ # Create a correct word result.
165
+ #
166
+ # @param word [String] The word
167
+ # @param position [Integer] Position in source (optional)
168
+ # @return [WordResult] New result indicating correct spelling
169
+ #
170
+ # @example
171
+ # WordResult.correct("hello")
172
+ def self.correct(word, position: nil)
173
+ new(word, correct: true, position: position)
174
+ end
175
+
176
+ # Create an incorrect word result with suggestions.
177
+ #
178
+ # @param word [String] The misspelled word
179
+ # @param suggestions [Suggestions::SuggestionSet, Array<String>] Suggestions
180
+ # @param position [Integer] Position in source (optional)
181
+ # @return [WordResult] New result indicating incorrect spelling
182
+ #
183
+ # @example With SuggestionSet
184
+ # suggestions = SuggestionSet.from_words(%w[hello help], source: :test)
185
+ # WordResult.incorrect("helo", suggestions: suggestions)
186
+ #
187
+ # @example With array of words
188
+ # WordResult.incorrect("helo", suggestions: %w[hello help])
189
+ def self.incorrect(word, suggestions: nil, position: nil)
190
+ suggestions_set = if suggestions.is_a?(Suggestions::SuggestionSet)
191
+ suggestions
192
+ elsif suggestions.is_a?(Array)
193
+ Suggestions::SuggestionSet.from_words(suggestions, source: :default)
194
+ else
195
+ Suggestions::SuggestionSet.empty
196
+ end
197
+
198
+ new(word, correct: false, suggestions: suggestions_set, position: position)
199
+ end
200
+ end
201
+ end
202
+ end
203
+ end
@@ -0,0 +1,142 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Models
5
+ # Word model representing a dictionary word with metadata.
6
+ #
7
+ # This is a value object that represents a word in the dictionary
8
+ # along with its morphological information (flags and data).
9
+ #
10
+ # @note This class is immutable and frozen on initialization.
11
+ #
12
+ # @example Creating a word
13
+ # word = Models::Word.new("hello", flags: ["noun"], morphological_data: { root: "hell" })
14
+ # word.text # => "hello"
15
+ # word.valid? # => true
16
+ class Word
17
+ # @return [String] The word text
18
+ attr_reader :text
19
+
20
+ # @return [Array<String>] Morphological flags (e.g., "noun", "verb")
21
+ attr_reader :flags
22
+
23
+ # @return [Hash] Additional morphological data
24
+ attr_reader :morphological_data
25
+
26
+ # Create a new Word.
27
+ #
28
+ # @param text [String] The word text
29
+ # @param flags [Array<String>] Morphological flags (optional)
30
+ # @param morphological_data [Hash] Additional morphological data (optional)
31
+ def initialize(text, flags: [], morphological_data: {})
32
+ raise ArgumentError, "Text cannot be empty" if text.nil? || text.empty?
33
+
34
+ @text = text.dup.freeze
35
+ @flags = flags.dup.freeze
36
+ @morphological_data = morphological_data.dup.freeze
37
+
38
+ freeze
39
+ end
40
+
41
+ # Check if the word is valid (has content).
42
+ #
43
+ # @return [Boolean] True if the word is valid
44
+ def valid?
45
+ !@text.nil? && !@text.empty?
46
+ end
47
+
48
+ # Check if the word has a specific flag.
49
+ #
50
+ # @param flag [String] The flag to check
51
+ # @return [Boolean] True if the word has the flag
52
+ def has_flag?(flag)
53
+ @flags.include?(flag)
54
+ end
55
+
56
+ # Check if the word has any flags.
57
+ #
58
+ # @return [Boolean] True if the word has flags
59
+ def has_flags?
60
+ !@flags.empty?
61
+ end
62
+
63
+ # Get the length of the word.
64
+ #
65
+ # @return [Integer] Word length
66
+ def length
67
+ @text.length
68
+ end
69
+
70
+ # Check if the word is empty.
71
+ #
72
+ # @return [Boolean] True if the word is empty
73
+ def empty?
74
+ @text.empty?
75
+ end
76
+
77
+ # Convert to string.
78
+ #
79
+ # @return [String] The word text
80
+ def to_s
81
+ @text
82
+ end
83
+
84
+ # Convert to hash.
85
+ #
86
+ # @return [Hash] Hash representation
87
+ def to_h
88
+ {
89
+ text: @text,
90
+ flags: @flags,
91
+ morphological_data: @morphological_data
92
+ }
93
+ end
94
+
95
+ # Check equality based on text.
96
+ #
97
+ # @param other [Word, String] The other object
98
+ # @return [Boolean] True if equal
99
+ def ==(other)
100
+ return false unless other.is_a?(Word)
101
+
102
+ @text == other.text
103
+ end
104
+ alias eql? ==
105
+
106
+ # Hash based on text.
107
+ #
108
+ # @return [Integer] Hash code
109
+ def hash
110
+ @text.hash
111
+ end
112
+
113
+ # Compare words by text.
114
+ #
115
+ # @param other [Word] The other word
116
+ # @return [Integer] Comparison result
117
+ def <=>(other)
118
+ return nil unless other.is_a?(Word)
119
+
120
+ @text <=> other.text
121
+ end
122
+
123
+ # Create a word from a Hunspell dictionary line.
124
+ #
125
+ # @param line [String] Dictionary line (e.g., "hello/flag" or "hello")
126
+ # @return [Word] New word instance
127
+ #
128
+ # @example
129
+ # Word.from_dic_line("hello/N") # => Word with text "hello" and flag "N"
130
+ # Word.from_dic_line("hello") # => Word with text "hello" and no flags
131
+ def self.from_dic_line(line)
132
+ return nil if line.nil? || line.empty?
133
+
134
+ parts = line.split("/", 2)
135
+ text = parts[0]
136
+ flags = parts[1] ? parts[1].split("") : []
137
+
138
+ new(text, flags: flags)
139
+ end
140
+ end
141
+ end
142
+ end