kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "word_result"
|
|
4
|
+
|
|
5
|
+
module Kotoshu
|
|
6
|
+
module Models
|
|
7
|
+
module Result
|
|
8
|
+
# Result object for checking a document or file.
|
|
9
|
+
#
|
|
10
|
+
# This is a value object that represents the result of checking
|
|
11
|
+
# an entire document for spelling errors.
|
|
12
|
+
#
|
|
13
|
+
# @note This class is immutable and frozen on initialization.
|
|
14
|
+
#
|
|
15
|
+
# @example Creating a successful document result
|
|
16
|
+
# result = DocumentResult.new(
|
|
17
|
+
# file: "README.md",
|
|
18
|
+
# errors: [],
|
|
19
|
+
# word_count: 150
|
|
20
|
+
# )
|
|
21
|
+
# result.success? # => true
|
|
22
|
+
# result.error_count # => 0
|
|
23
|
+
#
|
|
24
|
+
# @example Creating a result with errors
|
|
25
|
+
# errors = [WordResult.incorrect("helo"), WordResult.incorrect("wrold")]
|
|
26
|
+
# result = DocumentResult.new(
|
|
27
|
+
# file: "document.txt",
|
|
28
|
+
# errors: errors,
|
|
29
|
+
# word_count: 100
|
|
30
|
+
# )
|
|
31
|
+
# result.success? # => false
|
|
32
|
+
# result.error_count # => 2
|
|
33
|
+
class DocumentResult
|
|
34
|
+
# @return [String, nil] The file path (if applicable)
|
|
35
|
+
attr_reader :file
|
|
36
|
+
|
|
37
|
+
# @return [Array<WordResult>] List of spelling errors found
|
|
38
|
+
attr_reader :errors
|
|
39
|
+
|
|
40
|
+
# @return [Integer] Total word count
|
|
41
|
+
attr_reader :word_count
|
|
42
|
+
|
|
43
|
+
# @return [Hash] Additional metadata
|
|
44
|
+
attr_reader :metadata
|
|
45
|
+
|
|
46
|
+
# Create a new DocumentResult.
|
|
47
|
+
#
|
|
48
|
+
# @param file [String, nil] The file path (optional)
|
|
49
|
+
# @param errors [Array<WordResult>] List of errors
|
|
50
|
+
# @param word_count [Integer] Total word count
|
|
51
|
+
# @param metadata [Hash] Additional metadata (optional)
|
|
52
|
+
def initialize(file: nil, errors: [], word_count: 0, metadata: {})
|
|
53
|
+
@file = file&.dup&.freeze
|
|
54
|
+
@errors = errors.dup.freeze
|
|
55
|
+
@word_count = word_count
|
|
56
|
+
@metadata = metadata.dup.freeze
|
|
57
|
+
|
|
58
|
+
freeze
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Check if the document check was successful (no errors).
|
|
62
|
+
#
|
|
63
|
+
# @return [Boolean] True if no errors were found
|
|
64
|
+
def success?
|
|
65
|
+
@errors.empty?
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Check if the document check failed (has errors).
|
|
69
|
+
#
|
|
70
|
+
# @return [Boolean] True if errors were found
|
|
71
|
+
def failed?
|
|
72
|
+
!success?
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Get the number of errors found.
|
|
76
|
+
#
|
|
77
|
+
# @return [Integer] Error count
|
|
78
|
+
def error_count
|
|
79
|
+
@errors.size
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Get the number of unique errors (by word).
|
|
83
|
+
#
|
|
84
|
+
# @return [Integer] Unique error count
|
|
85
|
+
def unique_error_count
|
|
86
|
+
@errors.map(&:word).uniq.size
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Check if a specific word has an error.
|
|
90
|
+
#
|
|
91
|
+
# @param word [String] The word to check
|
|
92
|
+
# @return [Boolean] True if the word has an error
|
|
93
|
+
def has_error_for?(word)
|
|
94
|
+
@errors.any? { |e| e.word == word }
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get errors for a specific word.
|
|
98
|
+
#
|
|
99
|
+
# @param word [String] The word
|
|
100
|
+
# @return [Array<WordResult>] Errors for the word
|
|
101
|
+
def errors_for(word)
|
|
102
|
+
@errors.select { |e| e.word == word }
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Iterate over errors.
|
|
106
|
+
#
|
|
107
|
+
# @yield [error] Each error
|
|
108
|
+
# @return [Enumerator] Enumerator if no block given
|
|
109
|
+
def each_error(&block)
|
|
110
|
+
return enum_for(:each_error) unless block_given?
|
|
111
|
+
|
|
112
|
+
@errors.each(&block)
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# Iterate over unique error words.
|
|
116
|
+
#
|
|
117
|
+
# @yield [word, errors] Each unique word and its errors
|
|
118
|
+
# @return [Enumerator] Enumerator if no block given
|
|
119
|
+
def each_unique_error(&block)
|
|
120
|
+
return enum_for(:each_unique_error) unless block_given?
|
|
121
|
+
|
|
122
|
+
@errors.group_by(&:word).each(&block)
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Get the first N errors.
|
|
126
|
+
#
|
|
127
|
+
# @param n [Integer] Number of errors to return
|
|
128
|
+
# @return [Array<WordResult>] First N errors
|
|
129
|
+
def first_errors(n = 10)
|
|
130
|
+
@errors.first(n)
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
# Get error summary as a hash.
|
|
134
|
+
#
|
|
135
|
+
# @return [Hash] Summary of errors
|
|
136
|
+
def error_summary
|
|
137
|
+
summary = Hash.new(0)
|
|
138
|
+
each_error do |error|
|
|
139
|
+
summary[error.word] += 1
|
|
140
|
+
end
|
|
141
|
+
summary
|
|
142
|
+
end
|
|
143
|
+
|
|
144
|
+
# Convert to hash.
|
|
145
|
+
#
|
|
146
|
+
# @return [Hash] Hash representation
|
|
147
|
+
def to_h
|
|
148
|
+
{
|
|
149
|
+
file: @file,
|
|
150
|
+
success: success?,
|
|
151
|
+
word_count: @word_count,
|
|
152
|
+
error_count: error_count,
|
|
153
|
+
unique_error_count: unique_error_count,
|
|
154
|
+
errors: @errors.map(&:to_h),
|
|
155
|
+
error_summary: error_summary,
|
|
156
|
+
metadata: @metadata
|
|
157
|
+
}
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
# Convert to JSON-compatible hash.
|
|
161
|
+
#
|
|
162
|
+
# @return [Hash] JSON-compatible hash
|
|
163
|
+
def as_json
|
|
164
|
+
{
|
|
165
|
+
"file" => @file,
|
|
166
|
+
"success" => success?,
|
|
167
|
+
"wordCount" => @word_count,
|
|
168
|
+
"errorCount" => error_count,
|
|
169
|
+
"uniqueErrorCount" => unique_error_count,
|
|
170
|
+
"errors" => @errors.map(&:as_json),
|
|
171
|
+
"errorSummary" => error_summary,
|
|
172
|
+
"metadata" => @metadata
|
|
173
|
+
}
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Check equality based on file and errors.
|
|
177
|
+
#
|
|
178
|
+
# @param other [DocumentResult] The other result
|
|
179
|
+
# @return [Boolean] True if equal
|
|
180
|
+
def ==(other)
|
|
181
|
+
return false unless other.is_a?(DocumentResult)
|
|
182
|
+
|
|
183
|
+
@file == other.file && @errors == other.errors
|
|
184
|
+
end
|
|
185
|
+
alias eql? ==
|
|
186
|
+
|
|
187
|
+
# Hash based on file and errors.
|
|
188
|
+
#
|
|
189
|
+
# @return [Integer] Hash code
|
|
190
|
+
def hash
|
|
191
|
+
[@file, @errors].hash
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
# String representation.
|
|
195
|
+
#
|
|
196
|
+
# @return [String] String representation
|
|
197
|
+
def to_s
|
|
198
|
+
if success?
|
|
199
|
+
if @file
|
|
200
|
+
"File '#{@file}': No spelling errors found (#{@word_count} words checked)"
|
|
201
|
+
else
|
|
202
|
+
"No spelling errors found (#{@word_count} words checked)"
|
|
203
|
+
end
|
|
204
|
+
else
|
|
205
|
+
prefix = @file ? "File '#{@file}':" : ""
|
|
206
|
+
"#{prefix} #{error_count} spelling error(s) found " \
|
|
207
|
+
"(#{unique_error_count} unique) in #{@word_count} words"
|
|
208
|
+
end
|
|
209
|
+
end
|
|
210
|
+
alias inspect to_s
|
|
211
|
+
|
|
212
|
+
# Create a successful document result.
|
|
213
|
+
#
|
|
214
|
+
# @param file [String, nil] The file path (optional)
|
|
215
|
+
# @param word_count [Integer] Total word count
|
|
216
|
+
# @return [DocumentResult] New result indicating success
|
|
217
|
+
#
|
|
218
|
+
# @example
|
|
219
|
+
# DocumentResult.success(file: "README.md", word_count: 150)
|
|
220
|
+
def self.success(file: nil, word_count: 0)
|
|
221
|
+
new(file: file, errors: [], word_count: word_count)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Create a failed document result.
|
|
225
|
+
#
|
|
226
|
+
# @param file [String, nil] The file path (optional)
|
|
227
|
+
# @param errors [Array<WordResult>] List of errors
|
|
228
|
+
# @param word_count [Integer] Total word count
|
|
229
|
+
# @return [DocumentResult] New result indicating failure
|
|
230
|
+
#
|
|
231
|
+
# @example
|
|
232
|
+
# errors = [WordResult.incorrect("helo"), WordResult.incorrect("wrold")]
|
|
233
|
+
# DocumentResult.failure(file: "doc.txt", errors: errors, word_count: 100)
|
|
234
|
+
def self.failure(file: nil, errors: [], word_count: 0)
|
|
235
|
+
new(file: file, errors: errors, word_count: word_count)
|
|
236
|
+
end
|
|
237
|
+
|
|
238
|
+
# Merge multiple document results.
|
|
239
|
+
#
|
|
240
|
+
# @param results [Array<DocumentResult>] Results to merge
|
|
241
|
+
# @return [DocumentResult] Merged result
|
|
242
|
+
#
|
|
243
|
+
# @example Merging results from multiple files
|
|
244
|
+
# result1 = DocumentResult.new(file: "file1.txt", errors: [e1], word_count: 50)
|
|
245
|
+
# result2 = DocumentResult.new(file: "file2.txt", errors: [e2, e3], word_count: 75)
|
|
246
|
+
# DocumentResult.merge([result1, result2])
|
|
247
|
+
# # => DocumentResult with 3 errors and 125 words
|
|
248
|
+
def self.merge(results)
|
|
249
|
+
return new if results.empty?
|
|
250
|
+
|
|
251
|
+
all_errors = results.flat_map(&:errors)
|
|
252
|
+
total_words = results.sum(&:word_count)
|
|
253
|
+
|
|
254
|
+
new(
|
|
255
|
+
file: nil, # Merged results don't have a single file
|
|
256
|
+
errors: all_errors,
|
|
257
|
+
word_count: total_words
|
|
258
|
+
)
|
|
259
|
+
end
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
end
|
|
263
|
+
end
|
|
@@ -0,0 +1,203 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "../../../suggestions/suggestion_set"
|
|
4
|
+
|
|
5
|
+
module Kotoshu
|
|
6
|
+
module Models
|
|
7
|
+
module Result
|
|
8
|
+
# Result object for checking a single word.
|
|
9
|
+
#
|
|
10
|
+
# This is a value object that represents the result of checking
|
|
11
|
+
# a single word for spelling errors, including any suggestions.
|
|
12
|
+
#
|
|
13
|
+
# @note This class is immutable and frozen on initialization.
|
|
14
|
+
#
|
|
15
|
+
# @example Creating a correct word result
|
|
16
|
+
# result = WordResult.new("hello", correct: true)
|
|
17
|
+
# result.correct? # => true
|
|
18
|
+
# result.word # => "hello"
|
|
19
|
+
# result.suggestions # => SuggestionSet.empty
|
|
20
|
+
#
|
|
21
|
+
# @example Creating an incorrect word result with suggestions
|
|
22
|
+
# suggestions = SuggestionSet.from_words(%w[hello help], source: :test)
|
|
23
|
+
# result = WordResult.new("helo", correct: false, suggestions: suggestions)
|
|
24
|
+
# result.correct? # => false
|
|
25
|
+
# result.has_suggestions? # => true
|
|
26
|
+
class WordResult
|
|
27
|
+
# @return [String] The word that was checked
|
|
28
|
+
attr_reader :word
|
|
29
|
+
|
|
30
|
+
# @return [Boolean] Whether the word is spelled correctly
|
|
31
|
+
attr_reader :correct
|
|
32
|
+
|
|
33
|
+
# @return [Suggestions::SuggestionSet] Suggestions for correction
|
|
34
|
+
attr_reader :suggestions
|
|
35
|
+
|
|
36
|
+
# @return [Integer] The position of the word in the source text (optional)
|
|
37
|
+
attr_reader :position
|
|
38
|
+
|
|
39
|
+
# @return [Hash] Additional metadata
|
|
40
|
+
attr_reader :metadata
|
|
41
|
+
|
|
42
|
+
# Create a new WordResult.
|
|
43
|
+
#
|
|
44
|
+
# @param word [String] The word that was checked
|
|
45
|
+
# @param correct [Boolean] Whether the word is correct
|
|
46
|
+
# @param suggestions [Suggestions::SuggestionSet] Suggestions (optional)
|
|
47
|
+
# @param position [Integer] Position in source text (optional)
|
|
48
|
+
# @param metadata [Hash] Additional metadata (optional)
|
|
49
|
+
def initialize(word, correct:, suggestions: nil, position: nil, metadata: {})
|
|
50
|
+
word = "" if word.nil?
|
|
51
|
+
|
|
52
|
+
@word = word.dup.freeze
|
|
53
|
+
@correct = correct
|
|
54
|
+
@suggestions = suggestions || Suggestions::SuggestionSet.empty
|
|
55
|
+
@position = position
|
|
56
|
+
@metadata = metadata.dup.freeze
|
|
57
|
+
|
|
58
|
+
freeze
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Check if the word is correct.
|
|
62
|
+
#
|
|
63
|
+
# @return [Boolean] True if the word is spelled correctly
|
|
64
|
+
def correct?
|
|
65
|
+
@correct
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Check if the word is incorrect.
|
|
69
|
+
#
|
|
70
|
+
# @return [Boolean] True if the word is misspelled
|
|
71
|
+
def incorrect?
|
|
72
|
+
!@correct
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Check if there are suggestions.
|
|
76
|
+
#
|
|
77
|
+
# @return [Boolean] True if suggestions are available
|
|
78
|
+
def has_suggestions?
|
|
79
|
+
!@suggestions.empty?
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
# Get the number of suggestions.
|
|
83
|
+
#
|
|
84
|
+
# @return [Integer] Number of suggestions
|
|
85
|
+
def suggestion_count
|
|
86
|
+
@suggestions.size
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
# Get the top N suggestions.
|
|
90
|
+
#
|
|
91
|
+
# @param n [Integer] Number of suggestions to return
|
|
92
|
+
# @return [Array<String>] Top N suggestion words
|
|
93
|
+
def top_suggestions(n = 3)
|
|
94
|
+
@suggestions.top(n).map(&:word)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Get the first (best) suggestion.
|
|
98
|
+
#
|
|
99
|
+
# @return [String, nil] The best suggestion or nil
|
|
100
|
+
def first_suggestion
|
|
101
|
+
@suggestions.first&.word
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
# Convert to hash.
|
|
105
|
+
#
|
|
106
|
+
# @return [Hash] Hash representation
|
|
107
|
+
def to_h
|
|
108
|
+
{
|
|
109
|
+
word: @word,
|
|
110
|
+
correct: @correct,
|
|
111
|
+
position: @position,
|
|
112
|
+
suggestion_count: suggestion_count,
|
|
113
|
+
suggestions: top_suggestions(10),
|
|
114
|
+
metadata: @metadata
|
|
115
|
+
}
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Convert to JSON-compatible hash.
|
|
119
|
+
#
|
|
120
|
+
# @return [Hash] JSON-compatible hash
|
|
121
|
+
def as_json
|
|
122
|
+
{
|
|
123
|
+
"word" => @word,
|
|
124
|
+
"correct" => @correct,
|
|
125
|
+
"position" => @position,
|
|
126
|
+
"suggestionCount" => suggestion_count,
|
|
127
|
+
"suggestions" => top_suggestions(10),
|
|
128
|
+
"metadata" => @metadata
|
|
129
|
+
}
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Check equality based on word and correctness.
|
|
133
|
+
#
|
|
134
|
+
# @param other [WordResult] The other result
|
|
135
|
+
# @return [Boolean] True if equal
|
|
136
|
+
def ==(other)
|
|
137
|
+
return false unless other.is_a?(WordResult)
|
|
138
|
+
|
|
139
|
+
@word == other.word && @correct == other.correct
|
|
140
|
+
end
|
|
141
|
+
alias eql? ==
|
|
142
|
+
|
|
143
|
+
# Hash based on word and correctness.
|
|
144
|
+
#
|
|
145
|
+
# @return [Integer] Hash code
|
|
146
|
+
def hash
|
|
147
|
+
[@word, @correct].hash
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# String representation.
|
|
151
|
+
#
|
|
152
|
+
# @return [String] String representation
|
|
153
|
+
def to_s
|
|
154
|
+
if @correct
|
|
155
|
+
@word
|
|
156
|
+
elsif has_suggestions?
|
|
157
|
+
"#{@word} (did you mean #{top_suggestions(3).join(", ")}?)"
|
|
158
|
+
else
|
|
159
|
+
"#{@word} (no suggestions)"
|
|
160
|
+
end
|
|
161
|
+
end
|
|
162
|
+
alias inspect to_s
|
|
163
|
+
|
|
164
|
+
# Create a correct word result.
|
|
165
|
+
#
|
|
166
|
+
# @param word [String] The word
|
|
167
|
+
# @param position [Integer] Position in source (optional)
|
|
168
|
+
# @return [WordResult] New result indicating correct spelling
|
|
169
|
+
#
|
|
170
|
+
# @example
|
|
171
|
+
# WordResult.correct("hello")
|
|
172
|
+
def self.correct(word, position: nil)
|
|
173
|
+
new(word, correct: true, position: position)
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
# Create an incorrect word result with suggestions.
|
|
177
|
+
#
|
|
178
|
+
# @param word [String] The misspelled word
|
|
179
|
+
# @param suggestions [Suggestions::SuggestionSet, Array<String>] Suggestions
|
|
180
|
+
# @param position [Integer] Position in source (optional)
|
|
181
|
+
# @return [WordResult] New result indicating incorrect spelling
|
|
182
|
+
#
|
|
183
|
+
# @example With SuggestionSet
|
|
184
|
+
# suggestions = SuggestionSet.from_words(%w[hello help], source: :test)
|
|
185
|
+
# WordResult.incorrect("helo", suggestions: suggestions)
|
|
186
|
+
#
|
|
187
|
+
# @example With array of words
|
|
188
|
+
# WordResult.incorrect("helo", suggestions: %w[hello help])
|
|
189
|
+
def self.incorrect(word, suggestions: nil, position: nil)
|
|
190
|
+
suggestions_set = if suggestions.is_a?(Suggestions::SuggestionSet)
|
|
191
|
+
suggestions
|
|
192
|
+
elsif suggestions.is_a?(Array)
|
|
193
|
+
Suggestions::SuggestionSet.from_words(suggestions, source: :default)
|
|
194
|
+
else
|
|
195
|
+
Suggestions::SuggestionSet.empty
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
new(word, correct: false, suggestions: suggestions_set, position: position)
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
end
|
|
203
|
+
end
|
|
@@ -0,0 +1,142 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Models
|
|
5
|
+
# Word model representing a dictionary word with metadata.
|
|
6
|
+
#
|
|
7
|
+
# This is a value object that represents a word in the dictionary
|
|
8
|
+
# along with its morphological information (flags and data).
|
|
9
|
+
#
|
|
10
|
+
# @note This class is immutable and frozen on initialization.
|
|
11
|
+
#
|
|
12
|
+
# @example Creating a word
|
|
13
|
+
# word = Models::Word.new("hello", flags: ["noun"], morphological_data: { root: "hell" })
|
|
14
|
+
# word.text # => "hello"
|
|
15
|
+
# word.valid? # => true
|
|
16
|
+
class Word
|
|
17
|
+
# @return [String] The word text
|
|
18
|
+
attr_reader :text
|
|
19
|
+
|
|
20
|
+
# @return [Array<String>] Morphological flags (e.g., "noun", "verb")
|
|
21
|
+
attr_reader :flags
|
|
22
|
+
|
|
23
|
+
# @return [Hash] Additional morphological data
|
|
24
|
+
attr_reader :morphological_data
|
|
25
|
+
|
|
26
|
+
# Create a new Word.
|
|
27
|
+
#
|
|
28
|
+
# @param text [String] The word text
|
|
29
|
+
# @param flags [Array<String>] Morphological flags (optional)
|
|
30
|
+
# @param morphological_data [Hash] Additional morphological data (optional)
|
|
31
|
+
def initialize(text, flags: [], morphological_data: {})
|
|
32
|
+
raise ArgumentError, "Text cannot be empty" if text.nil? || text.empty?
|
|
33
|
+
|
|
34
|
+
@text = text.dup.freeze
|
|
35
|
+
@flags = flags.dup.freeze
|
|
36
|
+
@morphological_data = morphological_data.dup.freeze
|
|
37
|
+
|
|
38
|
+
freeze
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Check if the word is valid (has content).
|
|
42
|
+
#
|
|
43
|
+
# @return [Boolean] True if the word is valid
|
|
44
|
+
def valid?
|
|
45
|
+
!@text.nil? && !@text.empty?
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Check if the word has a specific flag.
|
|
49
|
+
#
|
|
50
|
+
# @param flag [String] The flag to check
|
|
51
|
+
# @return [Boolean] True if the word has the flag
|
|
52
|
+
def has_flag?(flag)
|
|
53
|
+
@flags.include?(flag)
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Check if the word has any flags.
|
|
57
|
+
#
|
|
58
|
+
# @return [Boolean] True if the word has flags
|
|
59
|
+
def has_flags?
|
|
60
|
+
!@flags.empty?
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
# Get the length of the word.
|
|
64
|
+
#
|
|
65
|
+
# @return [Integer] Word length
|
|
66
|
+
def length
|
|
67
|
+
@text.length
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Check if the word is empty.
|
|
71
|
+
#
|
|
72
|
+
# @return [Boolean] True if the word is empty
|
|
73
|
+
def empty?
|
|
74
|
+
@text.empty?
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
# Convert to string.
|
|
78
|
+
#
|
|
79
|
+
# @return [String] The word text
|
|
80
|
+
def to_s
|
|
81
|
+
@text
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
# Convert to hash.
|
|
85
|
+
#
|
|
86
|
+
# @return [Hash] Hash representation
|
|
87
|
+
def to_h
|
|
88
|
+
{
|
|
89
|
+
text: @text,
|
|
90
|
+
flags: @flags,
|
|
91
|
+
morphological_data: @morphological_data
|
|
92
|
+
}
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Check equality based on text.
|
|
96
|
+
#
|
|
97
|
+
# @param other [Word, String] The other object
|
|
98
|
+
# @return [Boolean] True if equal
|
|
99
|
+
def ==(other)
|
|
100
|
+
return false unless other.is_a?(Word)
|
|
101
|
+
|
|
102
|
+
@text == other.text
|
|
103
|
+
end
|
|
104
|
+
alias eql? ==
|
|
105
|
+
|
|
106
|
+
# Hash based on text.
|
|
107
|
+
#
|
|
108
|
+
# @return [Integer] Hash code
|
|
109
|
+
def hash
|
|
110
|
+
@text.hash
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Compare words by text.
|
|
114
|
+
#
|
|
115
|
+
# @param other [Word] The other word
|
|
116
|
+
# @return [Integer] Comparison result
|
|
117
|
+
def <=>(other)
|
|
118
|
+
return nil unless other.is_a?(Word)
|
|
119
|
+
|
|
120
|
+
@text <=> other.text
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
# Create a word from a Hunspell dictionary line.
|
|
124
|
+
#
|
|
125
|
+
# @param line [String] Dictionary line (e.g., "hello/flag" or "hello")
|
|
126
|
+
# @return [Word] New word instance
|
|
127
|
+
#
|
|
128
|
+
# @example
|
|
129
|
+
# Word.from_dic_line("hello/N") # => Word with text "hello" and flag "N"
|
|
130
|
+
# Word.from_dic_line("hello") # => Word with text "hello" and no flags
|
|
131
|
+
def self.from_dic_line(line)
|
|
132
|
+
return nil if line.nil? || line.empty?
|
|
133
|
+
|
|
134
|
+
parts = line.split("/", 2)
|
|
135
|
+
text = parts[0]
|
|
136
|
+
flags = parts[1] ? parts[1].split("") : []
|
|
137
|
+
|
|
138
|
+
new(text, flags: flags)
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
end
|
|
142
|
+
end
|