kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,174 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kotoshu
|
|
4
|
+
module Suggestions
|
|
5
|
+
# A single suggestion with associated metadata and behavior.
|
|
6
|
+
# This is MORE model-driven than Spylls which returns plain strings.
|
|
7
|
+
class Suggestion
|
|
8
|
+
attr_reader :word, :distance, :confidence, :source, :metadata
|
|
9
|
+
|
|
10
|
+
# @param word [String] The suggested word
|
|
11
|
+
# @param distance [Integer] Edit distance from original (lower is better)
|
|
12
|
+
# @param confidence [Float] Confidence score (0.0 to 1.0, higher is better)
|
|
13
|
+
# @param source [String, Symbol] The strategy that produced this suggestion
|
|
14
|
+
# @param metadata [Hash] Additional metadata about the suggestion
|
|
15
|
+
def initialize(word:, distance: 0, confidence: 1.0, source: :unknown, **metadata)
|
|
16
|
+
@word = word
|
|
17
|
+
@distance = distance
|
|
18
|
+
@confidence = confidence
|
|
19
|
+
@source = source
|
|
20
|
+
@metadata = metadata
|
|
21
|
+
freeze
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Check if this is a high-confidence suggestion.
|
|
25
|
+
#
|
|
26
|
+
# @return [Boolean] True if confidence >= 0.8
|
|
27
|
+
def high_confidence?
|
|
28
|
+
@confidence >= 0.8
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Check if this is a low-confidence suggestion.
|
|
32
|
+
#
|
|
33
|
+
# @return [Boolean] True if confidence < 0.5
|
|
34
|
+
def low_confidence?
|
|
35
|
+
@confidence < 0.5
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Calculate combined score considering distance and confidence.
|
|
39
|
+
#
|
|
40
|
+
# @param distance_weight [Float] Weight for distance (default: 0.3)
|
|
41
|
+
# @param confidence_weight [Float] Weight for confidence (default: 0.7)
|
|
42
|
+
# @return [Float] Combined score (0.0 to 1.0, higher is better)
|
|
43
|
+
def combined_score(distance_weight: 0.3, confidence_weight: 0.7)
|
|
44
|
+
# Normalize distance (assume max meaningful distance is 5)
|
|
45
|
+
normalized_distance = [@distance, 5].min / 5.0
|
|
46
|
+
distance_score = 1.0 - normalized_distance
|
|
47
|
+
|
|
48
|
+
(distance_score * distance_weight) + (@confidence * confidence_weight)
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Check if this suggestion is the same word as another.
|
|
52
|
+
#
|
|
53
|
+
# @param other [Suggestion, String] The other suggestion or word string
|
|
54
|
+
# @return [Boolean] True if words match (case-insensitive)
|
|
55
|
+
def same_word?(other)
|
|
56
|
+
other_word = other.is_a?(Suggestion) ? other.word : other.to_s
|
|
57
|
+
@word.downcase == other_word.downcase
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Check if this suggestion comes from a specific source.
|
|
61
|
+
#
|
|
62
|
+
# @param source [String, Symbol] The source to check
|
|
63
|
+
# @return [Boolean] True if this suggestion came from the source
|
|
64
|
+
def from_source?(source)
|
|
65
|
+
@source == source
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Compare suggestions for sorting (higher combined score first).
|
|
69
|
+
#
|
|
70
|
+
# Ranking priority (following CSpell/Hunspell approach):
|
|
71
|
+
# 1. Combined score (higher is better)
|
|
72
|
+
# 2. Edit distance (lower is better)
|
|
73
|
+
# 3. Length similarity (prefer similar length to original word)
|
|
74
|
+
# 4. N-gram similarity (more shared n-grams is better)
|
|
75
|
+
# 5. Alphabetical (ONLY as final tiebreaker)
|
|
76
|
+
#
|
|
77
|
+
# @param other [Suggestion] The other suggestion
|
|
78
|
+
# @return [Integer] -1, 0, or 1
|
|
79
|
+
def <=>(other)
|
|
80
|
+
# First by combined score (descending)
|
|
81
|
+
score_cmp = other.combined_score <=> combined_score
|
|
82
|
+
return score_cmp unless score_cmp.zero?
|
|
83
|
+
|
|
84
|
+
# Then by distance (ascending)
|
|
85
|
+
distance_cmp = @distance <=> other.distance
|
|
86
|
+
return distance_cmp unless distance_cmp.zero?
|
|
87
|
+
|
|
88
|
+
# Then by length similarity (like CSpell - prefer words of similar length)
|
|
89
|
+
# We need access to original word length, which is stored in metadata
|
|
90
|
+
orig_len = @metadata[:original_length] || @word.length
|
|
91
|
+
other_orig_len = other.metadata[:original_length] || other.word.length
|
|
92
|
+
|
|
93
|
+
# Calculate absolute difference from original length
|
|
94
|
+
my_len_diff = (@word.length - orig_len).abs
|
|
95
|
+
other_len_diff = (other.word.length - other_orig_len).abs
|
|
96
|
+
|
|
97
|
+
len_cmp = my_len_diff <=> other_len_diff
|
|
98
|
+
return len_cmp unless len_cmp.zero?
|
|
99
|
+
|
|
100
|
+
# Then by n-gram similarity (like Hunspell - more shared n-grams is better)
|
|
101
|
+
# We use pre-computed n-gram score from metadata if available
|
|
102
|
+
my_ngram = @metadata[:ngram_score] || 0
|
|
103
|
+
other_ngram = other.metadata[:ngram_score] || 0
|
|
104
|
+
|
|
105
|
+
ngram_cmp = other_ngram <=> my_ngram # Higher is better
|
|
106
|
+
return ngram_cmp unless ngram_cmp.zero?
|
|
107
|
+
|
|
108
|
+
# Finally by word alphabetically (ascending) - ONLY as final tiebreaker
|
|
109
|
+
@word.downcase <=> other.word.downcase
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Check equality with another suggestion.
|
|
113
|
+
#
|
|
114
|
+
# @param other [Object] The other object
|
|
115
|
+
# @return [Boolean] True if equal
|
|
116
|
+
def ==(other)
|
|
117
|
+
return false unless other.is_a?(Suggestion)
|
|
118
|
+
|
|
119
|
+
@word.downcase == other.word.downcase
|
|
120
|
+
end
|
|
121
|
+
alias eql? ==
|
|
122
|
+
|
|
123
|
+
# Hash value for use in Hash keys.
|
|
124
|
+
#
|
|
125
|
+
# @return [Integer] Hash code
|
|
126
|
+
def hash
|
|
127
|
+
@word.downcase.hash
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Convert suggestion to hash.
|
|
131
|
+
#
|
|
132
|
+
# @return [Hash] Suggestion as hash
|
|
133
|
+
def to_h
|
|
134
|
+
{
|
|
135
|
+
word: @word,
|
|
136
|
+
distance: @distance,
|
|
137
|
+
confidence: @confidence,
|
|
138
|
+
source: @source,
|
|
139
|
+
combined_score: combined_score
|
|
140
|
+
}.merge(@metadata)
|
|
141
|
+
end
|
|
142
|
+
|
|
143
|
+
# Convert suggestion to JSON-compatible hash.
|
|
144
|
+
#
|
|
145
|
+
# @return [Hash] JSON-compatible hash
|
|
146
|
+
def as_json(*)
|
|
147
|
+
to_h
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
# String representation.
|
|
151
|
+
#
|
|
152
|
+
# @return [String] String representation
|
|
153
|
+
def to_s
|
|
154
|
+
"Suggestion(word: '#{@word}', distance: #{@distance}, confidence: #{format("%.2f", @confidence)}, source: #{@source})"
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# Inspect the suggestion.
|
|
158
|
+
#
|
|
159
|
+
# @return [String] Inspection string
|
|
160
|
+
def inspect
|
|
161
|
+
to_s
|
|
162
|
+
end
|
|
163
|
+
|
|
164
|
+
# Create a suggestion from a simple word (convenience method).
|
|
165
|
+
#
|
|
166
|
+
# @param word [String] The word
|
|
167
|
+
# @param source [String, Symbol] The source
|
|
168
|
+
# @return [Suggestion] New suggestion
|
|
169
|
+
def self.from_word(word, source: :unknown)
|
|
170
|
+
new(word: word, distance: 0, confidence: 1.0, source: source)
|
|
171
|
+
end
|
|
172
|
+
end
|
|
173
|
+
end
|
|
174
|
+
end
|
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "suggestion"
|
|
4
|
+
|
|
5
|
+
module Kotoshu
|
|
6
|
+
module Suggestions
|
|
7
|
+
# A collection of suggestions with rich query methods.
|
|
8
|
+
# This is MORE OOP than Spylls which returns plain iterators of strings.
|
|
9
|
+
class SuggestionSet
|
|
10
|
+
include Enumerable
|
|
11
|
+
|
|
12
|
+
attr_reader :suggestions, :max_size
|
|
13
|
+
|
|
14
|
+
# @param suggestions [Array<Suggestion>] Initial suggestions
|
|
15
|
+
# @param max_size [Integer] Maximum number of suggestions to keep
|
|
16
|
+
def initialize(suggestions = [], max_size: 10)
|
|
17
|
+
@suggestions = suggestions
|
|
18
|
+
@max_size = max_size
|
|
19
|
+
sort_and_limit!
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Add a suggestion to the set.
|
|
23
|
+
#
|
|
24
|
+
# @param suggestion [Suggestion] The suggestion to add
|
|
25
|
+
# @return [SuggestionSet] Self for chaining
|
|
26
|
+
def add(suggestion)
|
|
27
|
+
@suggestions << suggestion
|
|
28
|
+
sort_and_limit!
|
|
29
|
+
self
|
|
30
|
+
end
|
|
31
|
+
alias << add
|
|
32
|
+
|
|
33
|
+
# Add multiple suggestions.
|
|
34
|
+
#
|
|
35
|
+
# @param new_suggestions [Array<Suggestion>] Suggestions to add
|
|
36
|
+
# @return [SuggestionSet] Self for chaining
|
|
37
|
+
def concat(new_suggestions)
|
|
38
|
+
@suggestions.concat(new_suggestions)
|
|
39
|
+
sort_and_limit!
|
|
40
|
+
self
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Merge another suggestion set into this one.
|
|
44
|
+
#
|
|
45
|
+
# @param other [SuggestionSet] The other set
|
|
46
|
+
# @return [SuggestionSet] Self for chaining
|
|
47
|
+
def merge!(other)
|
|
48
|
+
concat(other.suggestions)
|
|
49
|
+
self
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Get suggestions by source.
|
|
53
|
+
#
|
|
54
|
+
# @param source [String, Symbol] The source to filter by
|
|
55
|
+
# @return [SuggestionSet] New set with filtered suggestions
|
|
56
|
+
def from_source(source)
|
|
57
|
+
SuggestionSet.new(@suggestions.select { |s| s.from_source?(source) }, max_size: @max_size)
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Get high-confidence suggestions.
|
|
61
|
+
#
|
|
62
|
+
# @return [SuggestionSet] New set with high-confidence suggestions
|
|
63
|
+
def high_confidence
|
|
64
|
+
SuggestionSet.new(@suggestions.select(&:high_confidence?), max_size: @max_size)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Get low-confidence suggestions.
|
|
68
|
+
#
|
|
69
|
+
# @return [SuggestionSet] New set with low-confidence suggestions
|
|
70
|
+
def low_confidence
|
|
71
|
+
SuggestionSet.new(@suggestions.select(&:low_confidence?), max_size: @max_size)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Get suggestions within a distance range.
|
|
75
|
+
#
|
|
76
|
+
# @param min_distance [Integer] Minimum distance
|
|
77
|
+
# @param max_distance [Integer] Maximum distance
|
|
78
|
+
# @return [SuggestionSet] New set with filtered suggestions
|
|
79
|
+
def within_distance(min_distance: 0, max_distance: 2)
|
|
80
|
+
filtered = @suggestions.select do |s|
|
|
81
|
+
s.distance >= min_distance && s.distance <= max_distance
|
|
82
|
+
end
|
|
83
|
+
SuggestionSet.new(filtered, max_size: @max_size)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
# Check if set contains a specific word.
|
|
87
|
+
#
|
|
88
|
+
# @param word [String] The word to check
|
|
89
|
+
# @return [Boolean] True if word is in suggestions
|
|
90
|
+
def include?(word)
|
|
91
|
+
@suggestions.any? { |s| s.same_word?(word) }
|
|
92
|
+
end
|
|
93
|
+
alias has_word? include?
|
|
94
|
+
|
|
95
|
+
# Find a suggestion by word.
|
|
96
|
+
#
|
|
97
|
+
# @param word [String] The word to find
|
|
98
|
+
# @return [Suggestion, nil] The suggestion or nil
|
|
99
|
+
def find_word(word)
|
|
100
|
+
@suggestions.find { |s| s.same_word?(word) }
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
# Get the top N suggestions.
|
|
104
|
+
#
|
|
105
|
+
# @param n [Integer] Number of suggestions to get
|
|
106
|
+
# @return [Array<Suggestion>] Top N suggestions
|
|
107
|
+
def top(n)
|
|
108
|
+
@suggestions.first(n)
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
# Get the first (best) suggestion.
|
|
112
|
+
#
|
|
113
|
+
# @return [Suggestion, nil] The best suggestion or nil
|
|
114
|
+
def first
|
|
115
|
+
@suggestions.first
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Get the last suggestion.
|
|
119
|
+
#
|
|
120
|
+
# @return [Suggestion, nil] The last suggestion or nil
|
|
121
|
+
def last
|
|
122
|
+
@suggestions.last
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Check if the set is empty.
|
|
126
|
+
#
|
|
127
|
+
# @return [Boolean] True if no suggestions
|
|
128
|
+
def empty?
|
|
129
|
+
@suggestions.empty?
|
|
130
|
+
end
|
|
131
|
+
|
|
132
|
+
# Get the number of suggestions.
|
|
133
|
+
#
|
|
134
|
+
# @return [Integer] Number of suggestions
|
|
135
|
+
def size
|
|
136
|
+
@suggestions.size
|
|
137
|
+
end
|
|
138
|
+
alias count size
|
|
139
|
+
alias length size
|
|
140
|
+
|
|
141
|
+
# Iterate over suggestions.
|
|
142
|
+
#
|
|
143
|
+
# @yield [suggestion] Each suggestion
|
|
144
|
+
# @return [Enumerator] Enumerator if no block given
|
|
145
|
+
def each(&block)
|
|
146
|
+
return enum_for(:each) unless block_given?
|
|
147
|
+
|
|
148
|
+
@suggestions.each(&block)
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
# Get unique suggestions (by word, case-insensitive).
|
|
152
|
+
#
|
|
153
|
+
# @return [SuggestionSet] New set with unique suggestions
|
|
154
|
+
def unique
|
|
155
|
+
seen = {}
|
|
156
|
+
unique_suggestions = @suggestions.select do |s|
|
|
157
|
+
word = s.word.downcase
|
|
158
|
+
if seen[word]
|
|
159
|
+
false
|
|
160
|
+
else
|
|
161
|
+
seen[word] = true
|
|
162
|
+
true
|
|
163
|
+
end
|
|
164
|
+
end
|
|
165
|
+
SuggestionSet.new(unique_suggestions, max_size: @max_size)
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
# Convert to array of words.
|
|
169
|
+
#
|
|
170
|
+
# @return [Array<String>] Array of suggestion words
|
|
171
|
+
def to_words
|
|
172
|
+
@suggestions.map(&:word)
|
|
173
|
+
end
|
|
174
|
+
alias words to_words
|
|
175
|
+
|
|
176
|
+
# Convert to array of hashes.
|
|
177
|
+
#
|
|
178
|
+
# @return [Array<Hash>] Array of suggestion hashes
|
|
179
|
+
def to_a
|
|
180
|
+
@suggestions.map(&:to_h)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# Convert to JSON-compatible array.
|
|
184
|
+
#
|
|
185
|
+
# @return [Array<Hash>] JSON-compatible array
|
|
186
|
+
def as_json(*)
|
|
187
|
+
to_a
|
|
188
|
+
end
|
|
189
|
+
|
|
190
|
+
# String representation.
|
|
191
|
+
#
|
|
192
|
+
# @return [String] String representation
|
|
193
|
+
def to_s
|
|
194
|
+
"SuggestionSet(size: #{size}, max_size: #{@max_size})"
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Inspect the suggestion set.
|
|
198
|
+
#
|
|
199
|
+
# @return [String] Inspection string
|
|
200
|
+
def inspect
|
|
201
|
+
if @suggestions.empty?
|
|
202
|
+
to_s
|
|
203
|
+
else
|
|
204
|
+
"#{self} [#{@suggestions.map(&:word).join(", ")}]"
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Create an empty suggestion set.
|
|
209
|
+
#
|
|
210
|
+
# @param max_size [Integer] Maximum size
|
|
211
|
+
# @return [SuggestionSet] Empty set
|
|
212
|
+
def self.empty(max_size: 10)
|
|
213
|
+
new([], max_size: max_size)
|
|
214
|
+
end
|
|
215
|
+
|
|
216
|
+
# Create a suggestion set from an array of words.
|
|
217
|
+
#
|
|
218
|
+
# @param words [Array<String>] Array of words
|
|
219
|
+
# @param source [String, Symbol] The source
|
|
220
|
+
# @param max_size [Integer] Maximum size
|
|
221
|
+
# @return [SuggestionSet] New set
|
|
222
|
+
def self.from_words(words, source: :unknown, max_size: 10)
|
|
223
|
+
suggestions = words.map { |w| Suggestion.from_word(w, source: source) }
|
|
224
|
+
new(suggestions, max_size: max_size)
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
private
|
|
228
|
+
|
|
229
|
+
# Sort suggestions by combined score and limit to max_size.
|
|
230
|
+
#
|
|
231
|
+
def sort_and_limit!
|
|
232
|
+
@suggestions.sort!
|
|
233
|
+
@suggestions.uniq! { |s| s.word.downcase }
|
|
234
|
+
@suggestions = @suggestions.first(@max_size)
|
|
235
|
+
end
|
|
236
|
+
end
|
|
237
|
+
end
|
|
238
|
+
end
|