kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,876 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kotoshu
4
+ module Algorithms
5
+ # Main "is this word correct?" algorithm implementation.
6
+ #
7
+ # Ported from Spylls (Python) lookup.py
8
+ #
9
+ # On a bird's-eye view level:
10
+ # * Word correctness check is an attempt to analyze word form
11
+ # (maybe it has this suffix? maybe it has this prefix? maybe it
12
+ # consists of several words?)
13
+ # * The word is considered correct if at least one form is found that
14
+ # has valid suffixes/prefixes from .aff file and valid stem from
15
+ # .dic file, and they are all compatible with each other.
16
+ #
17
+ # To follow algorithm details, start reading from Lookup.call method.
18
+ module Lookup
19
+ NUMBER_REGEXP = /^\d+(\.\d+)?$/.freeze
20
+
21
+ # Position of word part in compound word.
22
+ #
23
+ # Used when checking whether a word could be part of a compound
24
+ # (specifically its begin/middle/end).
25
+ module CompoundPos
26
+ BEGIN_POS = :begin
27
+ MIDDLE = :middle
28
+ END_POS = :end
29
+ end
30
+
31
+ # AffixForm is a hypothesis of how some word might be split into
32
+ # stem, suffixes and prefixes.
33
+ #
34
+ # It always has full text and stem, and may have up to two suffixes
35
+ # and up to two prefixes.
36
+ #
37
+ # The following is always true (considering absent affixes as empty):
38
+ # prefix + prefix2 + stem + suffix2 + suffix = text
39
+ #
40
+ # prefix2/suffix2 are "secondary", so if the word has only one suffix,
41
+ # it is stored in suffix and suffix2 is nil.
42
+ class AffixForm
43
+ # @return [String] Full word text
44
+ attr_reader :text
45
+
46
+ # @return [String] Stem (word without affixes)
47
+ attr_reader :stem
48
+
49
+ # @return [Hash, nil] Prefix affix data
50
+ attr_reader :prefix
51
+
52
+ # @return [Hash, nil] Suffix affix data
53
+ attr_reader :suffix
54
+
55
+ # @return [Hash, nil] Secondary prefix affix data
56
+ attr_reader :prefix2
57
+
58
+ # @return [Hash, nil] Secondary suffix affix data
59
+ attr_reader :suffix2
60
+
61
+ # @return [Hash, nil] Dictionary entry for stem
62
+ attr_reader :in_dictionary
63
+
64
+ def initialize(text, stem,
65
+ prefix: nil, suffix: nil,
66
+ prefix2: nil, suffix2: nil,
67
+ in_dictionary: nil)
68
+ @text = text
69
+ @stem = stem
70
+ @prefix = prefix
71
+ @suffix = suffix
72
+ @prefix2 = prefix2
73
+ @suffix2 = suffix2
74
+ @in_dictionary = in_dictionary
75
+ end
76
+
77
+ # Create a copy with changes.
78
+ #
79
+ # @param changes [Hash] Changes to apply
80
+ # @return [AffixForm] New affix form with changes applied
81
+ def replace(**changes)
82
+ self.class.new(
83
+ changes.fetch(:text, @text),
84
+ changes.fetch(:stem, @stem),
85
+ prefix: changes.fetch(:prefix, @prefix),
86
+ suffix: changes.fetch(:suffix, @suffix),
87
+ prefix2: changes.fetch(:prefix2, @prefix2),
88
+ suffix2: changes.fetch(:suffix2, @suffix2),
89
+ in_dictionary: changes.fetch(:in_dictionary, @in_dictionary)
90
+ )
91
+ end
92
+
93
+ # Check if this form has any affixes.
94
+ #
95
+ # @return [Boolean]
96
+ def has_affixes?
97
+ !@suffix.nil? || !@prefix.nil?
98
+ end
99
+
100
+ # Check if this is a base form (no affixes).
101
+ #
102
+ # @return [Boolean]
103
+ def is_base?
104
+ !has_affixes?
105
+ end
106
+
107
+ # Get all flags from stem and affixes.
108
+ #
109
+ # @return [Set<String>] Combined flags
110
+ def flags
111
+ flags = @in_dictionary ? Set.new(@in_dictionary[:flags] || []) : Set.new
112
+ flags.merge(@prefix[:flags] || []) if @prefix
113
+ flags.merge(@suffix[:flags] || []) if @suffix
114
+ flags
115
+ end
116
+
117
+ # Get all affixes (excluding nils).
118
+ #
119
+ # @return [Array<Hash>] List of affix data
120
+ def all_affixes
121
+ [@prefix2, @prefix, @suffix, @suffix2].compact
122
+ end
123
+
124
+ # String representation.
125
+ #
126
+ # @return [String]
127
+ def to_s
128
+ return @text if is_base?
129
+
130
+ parts = []
131
+ parts << @prefix.inspect if @prefix
132
+ parts << @prefix2.inspect if @prefix2
133
+ parts << @stem
134
+ parts << @suffix2.inspect if @suffix2
135
+ parts << @suffix.inspect if @suffix
136
+
137
+ "AffixForm(#{@text} = #{parts.join(' + ')})"
138
+ end
139
+
140
+ alias inspect to_s
141
+ end
142
+
143
+ # CompoundForm is a hypothesis of how some word could be split into
144
+ # several AffixForms (word parts with their own stems and possible affixes).
145
+ #
146
+ # Typically, only first part is allowed to have prefix, and only last
147
+ # part is allowed to have suffix, but there are languages where middle
148
+ # parts can have affixes too, specified by special flags.
149
+ class CompoundForm
150
+ # @return [Array<AffixForm>] Parts of the compound word
151
+ attr_reader :parts
152
+
153
+ def initialize(parts)
154
+ @parts = parts
155
+ end
156
+
157
+ # String representation.
158
+ #
159
+ # @return [String]
160
+ def to_s
161
+ "CompoundForm(#{@parts.map(&:to_s).join(' + ')})"
162
+ end
163
+
164
+ alias inspect to_s
165
+ end
166
+
167
+ # Main word correctness lookup class.
168
+ #
169
+ # Typically, you would not use this directly.
170
+ #
171
+ # Example:
172
+ # dictionary = Kotoshu::Dictionary.load('en_US')
173
+ # lookuper = dictionary.lookuper
174
+ #
175
+ # lookuper.call('spylls') # => false
176
+ # lookuper.call('spells') # => true
177
+ #
178
+ # lookuper.good_forms('spells') do |form|
179
+ # puts form
180
+ # end
181
+ # # AffixForm(spells = spells)
182
+ # # AffixForm(spells = spell + Suffix(s: S×, on [[^sxzhy]]$))
183
+ class Lookuper
184
+ # @return [Hash] Aff data structure (from aff file)
185
+ attr_reader :aff
186
+
187
+ # @return [Hash] Dic data structure (from dic file)
188
+ attr_reader :dic
189
+
190
+ def initialize(aff, dic)
191
+ @aff = aff
192
+ @dic = dic
193
+ end
194
+
195
+ # The outermost word correctness check.
196
+ #
197
+ # Basically, prepares word for check (converting/removing chars), and
198
+ # then checks whether any good word form can be produced with good_forms.
199
+ # If there is none, also tries to break word by break-points.
200
+ #
201
+ # @param word [String] Word to check
202
+ # @param capitalization [Boolean] If false, check only exact capitalization
203
+ # @param allow_nosuggest [Boolean] If false, don't consider NOSUGGEST words as correct
204
+ # @return [Boolean] Whether word is correct
205
+ def call(word, capitalization: true, allow_nosuggest: true)
206
+ # Check if word is correct
207
+ is_correct = ->(w) do
208
+ good_forms(w, capitalization: capitalization, allow_nosuggest: allow_nosuggest).any?
209
+ end
210
+
211
+ # If all entries matching the word have FORBIDDENWORD flag, word can't be correct
212
+ if @aff[:FORBIDDENWORD] && @dic[:has_flag]&.call(word, @aff[:FORBIDDENWORD], for_all: true)
213
+ return false
214
+ end
215
+
216
+ # Convert word with ICONV table
217
+ word_to_check = @aff[:ICONV] ? @aff[:ICONV].call(word) : word
218
+
219
+ # Remove ignored characters
220
+ if @aff[:IGNORE]
221
+ ignore_chars = @aff[:IGNORE]
222
+ word_to_check = word_to_check.chars.reject { |c| ignore_chars.include?(c) }.join
223
+ end
224
+
225
+ # Numbers are always good
226
+ return true if NUMBER_REGEXP.match?(word_to_check)
227
+
228
+ # Try breaking word by break patterns
229
+ break_word(word_to_check).each do |parts|
230
+ if parts.all? { |part| part.empty? || is_correct.call(part) }
231
+ return true
232
+ end
233
+ end
234
+
235
+ false
236
+ end
237
+
238
+ # Recursively produce all possible lists of word breaking by break
239
+ # patterns (like dashes).
240
+ #
241
+ # Example: "pre-processed-meat" would produce:
242
+ # ["pre-processed-meat"]
243
+ # ["pre", "processed-meat"]
244
+ # ["pre", "processed", "meat"]
245
+ # ["pre-processed", "meat"]
246
+ #
247
+ # This is necessary because dictionary might contain "pre-processed"
248
+ # as a separate entry.
249
+ #
250
+ # @param text [String] Text to break
251
+ # @param depth [Integer] Current recursion depth
252
+ # @yield [Array<String>] Each possible breaking
253
+ # @return [Enumerator] If no block given
254
+ def break_word(text, depth = 0)
255
+ return enum_for(:break_word, text, depth) unless block_given?
256
+ return if depth > 10
257
+
258
+ # Return whole text as first option
259
+ yield [text]
260
+
261
+ break_patterns = @aff[:BREAK] || []
262
+ break_patterns.each do |pattern|
263
+ str = text.to_s
264
+ pos = 0
265
+
266
+ while (match_data = pattern[:matcher].match(str, pos))
267
+ start = str[0...match_data.begin(1)]
268
+ rest = str[match_data.end(1)..]
269
+
270
+ break_word(rest, depth + 1) do |breaking|
271
+ yield [start, *breaking]
272
+ end
273
+
274
+ pos = match_data.end(0)
275
+ break if pos >= str.length
276
+ end
277
+ end
278
+ end
279
+
280
+ # The main producer of correct word forms.
281
+ #
282
+ # Produces all ways the proposed string might correspond to dictionary/
283
+ # affixes. If there is at least one, the word is correctly spelled.
284
+ #
285
+ # Example:
286
+ # lookuper.good_forms('building') do |form|
287
+ # puts form
288
+ # end
289
+ # # AffixForm(building = building) # noun
290
+ # # AffixForm(building = build + Suffix(ing: G×, on [[^e]]$)) # verb
291
+ #
292
+ # @param word [String] Word to check
293
+ # @param capitalization [Boolean] If false, use only exact capitalization
294
+ # @param allow_nosuggest [Boolean] If false, exclude NOSUGGEST words
295
+ # @param affix_forms [Boolean] If false, only return compound forms
296
+ # @param compound_forms [Boolean] If false, only return affix forms
297
+ # @yield [AffixForm, CompoundForm] Each valid word form
298
+ def good_forms(word,
299
+ capitalization: true,
300
+ allow_nosuggest: true,
301
+ affix_forms: true,
302
+ compound_forms: true)
303
+ return enum_for(:good_forms, word,
304
+ capitalization: capitalization,
305
+ allow_nosuggest: allow_nosuggest,
306
+ affix_forms: affix_forms,
307
+ compound_forms: compound_forms) unless block_given?
308
+
309
+ # Get capitalization variants
310
+ if capitalization
311
+ captype, variants = @aff[:casing].variants(word)
312
+ else
313
+ captype = @aff[:casing].guess(word)
314
+ variants = [word]
315
+ end
316
+
317
+ # Check each variant
318
+ variants.each do |variant|
319
+ if affix_forms
320
+ affix_forms_internal(variant, captype: captype, allow_nosuggest: allow_nosuggest) do |form|
321
+ # Special German ß handling
322
+ if @aff[:CHECKSHARPS] && @aff[:KEEPCASE]
323
+ stem = form.in_dictionary ? form.in_dictionary[:stem] : form.stem
324
+ if stem.include?('ß') &&
325
+ captype == Capitalization::Type::ALL &&
326
+ word.include?('ß') &&
327
+ form.flags.include?(@aff[:KEEPCASE])
328
+ next
329
+ end
330
+ end
331
+
332
+ yield form
333
+ end
334
+ end
335
+
336
+ if compound_forms
337
+ compound_forms_internal(variant, captype: captype, allow_nosuggest: allow_nosuggest) do |form|
338
+ yield form
339
+ end
340
+ end
341
+ end
342
+ end
343
+
344
+ # Check if the word is correct without yielding forms.
345
+ #
346
+ # Convenience method for simple correctness checks.
347
+ #
348
+ # @param word [String] Word to check
349
+ # @param capitalization [Boolean] Check capitalization variants
350
+ # @param allow_nosuggest [Boolean] Include NOSUGGEST words
351
+ # @param affix_forms [Boolean] Check affix forms
352
+ # @param compound_forms [Boolean] Check compound forms
353
+ # @return [Boolean] Whether word is correct
354
+ def correct?(word,
355
+ capitalization: true,
356
+ allow_nosuggest: true,
357
+ affix_forms: true,
358
+ compound_forms: true)
359
+ good_forms(word,
360
+ capitalization: capitalization,
361
+ allow_nosuggest: allow_nosuggest,
362
+ affix_forms: affix_forms,
363
+ compound_forms: compound_forms).any?
364
+ end
365
+
366
+ # Alias for better readability
367
+ alias is_correct? correct?
368
+
369
+ private
370
+
371
+ # Internal affix forms generator.
372
+ #
373
+ # @param word [String] Word to process
374
+ # @param captype [Symbol] Capitalization type
375
+ # @param allow_nosuggest [Boolean] Include NOSUGGEST words
376
+ # @yield [AffixForm] Each valid affix form
377
+ def affix_forms_internal(word, captype:, allow_nosuggest:)
378
+ return enum_for(:affix_forms_internal, word, captype: captype, allow_nosuggest: allow_nosuggest) unless block_given?
379
+
380
+ # Yield all possible affix forms
381
+ produce_affix_forms(word).each do |form|
382
+ # Check homonyms
383
+ homonyms = @dic[:homonyms]&.call(form.stem) || []
384
+ next if homonyms.empty?
385
+
386
+ # Check FORBIDDENWORD for compound/affix forms
387
+ if @aff[:FORBIDDENWORD] && form.has_affixes?
388
+ if homonyms.any? { |h| (h[:flags] || []).include?(@aff[:FORBIDDENWORD]) }
389
+ return
390
+ end
391
+ end
392
+
393
+ # Check each homonym
394
+ homonyms.each do |homonym|
395
+ candidate = form.replace(in_dictionary: homonym)
396
+ if is_good_form(candidate, captype: captype, allow_nosuggest: allow_nosuggest)
397
+ yield candidate
398
+ end
399
+ end
400
+
401
+ # Special case: FORCEUCASE for compound beginning
402
+ if captype == Capitalization::Type::INIT && @aff[:FORCEUCASE]
403
+ lower_homonyms = @dic[:homonyms]&.call(form.stem.downcase) || []
404
+ lower_homonyms.each do |homonym|
405
+ candidate = form.replace(in_dictionary: homonym)
406
+ if is_good_form(candidate, captype: captype, allow_nosuggest: allow_nosuggest)
407
+ yield candidate
408
+ end
409
+ end
410
+ end
411
+ end
412
+ end
413
+
414
+ # Internal compound forms generator.
415
+ #
416
+ # @param word [String] Word to process
417
+ # @param captype [Symbol] Capitalization type
418
+ # @param allow_nosuggest [Boolean] Include NOSUGGEST words
419
+ # @yield [CompoundForm] Each valid compound form
420
+ def compound_forms_internal(word, captype:, allow_nosuggest:)
421
+ return enum_for(:compound_forms_internal, word, captype: captype, allow_nosuggest: allow_nosuggest) unless block_given?
422
+
423
+ # Check if any affix form has FORBIDDENWORD
424
+ if @aff[:FORBIDDENWORD]
425
+ forbidden_found = false
426
+ affix_forms_internal(word, captype: captype, allow_nosuggest: allow_nosuggest, with_forbidden: true) do |form|
427
+ if form.flags.include?(@aff[:FORBIDDENWORD])
428
+ forbidden_found = true
429
+ break
430
+ end
431
+ end
432
+ return if forbidden_found
433
+ end
434
+
435
+ # Try compounds by flags
436
+ if @aff[:COMPOUNDBEGIN] || @aff[:COMPOUNDFLAG]
437
+ compounds_by_flags(word, captype: captype, allow_nosuggest: allow_nosuggest) do |compound|
438
+ yield compound unless is_bad_compound(compound, captype)
439
+ end
440
+ end
441
+
442
+ # Try compounds by rules
443
+ if @aff[:COMPOUNDRULE]
444
+ compounds_by_rules(word, allow_nosuggest: allow_nosuggest) do |compound|
445
+ yield compound unless is_bad_compound(compound, captype)
446
+ end
447
+ end
448
+ end
449
+
450
+ # Produce all possible affix forms for a word.
451
+ #
452
+ # @param word [String] Word to process
453
+ # @param compoundpos [Symbol, nil] Position in compound
454
+ # @param prefix_flags [Array<String>] Required prefix flags
455
+ # @param suffix_flags [Array<String>] Required suffix flags
456
+ # @param forbidden_flags [Array<String>] Forbidden affix flags
457
+ # @yield [AffixForm] Each possible affix form
458
+ def produce_affix_forms(word,
459
+ compoundpos: nil,
460
+ prefix_flags: [],
461
+ suffix_flags: [],
462
+ forbidden_flags: [])
463
+ return enum_for(:produce_affix_forms, word,
464
+ compoundpos: compoundpos,
465
+ prefix_flags: prefix_flags,
466
+ suffix_flags: suffix_flags,
467
+ forbidden_flags: forbidden_flags) unless block_given?
468
+
469
+ # "Whole word" is always an option
470
+ yield AffixForm.new(word, word)
471
+
472
+ # Check if suffixes/prefixes are allowed
473
+ suffix_allowed = compoundpos.nil? || compoundpos == CompoundPos::END_POS || !suffix_flags.empty?
474
+ prefix_allowed = compoundpos.nil? || compoundpos == CompoundPos::BEGIN_POS || !prefix_flags.empty?
475
+
476
+ # Generate suffix forms
477
+ if suffix_allowed
478
+ desuffix(word, required_flags: suffix_flags, forbidden_flags: forbidden_flags) do |form|
479
+ yield form
480
+ end
481
+ end
482
+
483
+ # Generate prefix forms
484
+ if prefix_allowed
485
+ deprefix(word, required_flags: prefix_flags, forbidden_flags: forbidden_flags) do |form|
486
+ yield form
487
+
488
+ # Try prefix + suffix if allowed
489
+ if suffix_allowed && form.prefix && form.prefix[:crossproduct]
490
+ desuffix(form.stem,
491
+ required_flags: suffix_flags,
492
+ forbidden_flags: forbidden_flags,
493
+ crossproduct: true) do |form2|
494
+ yield form2.replace(text: form.text, prefix: form.prefix)
495
+ end
496
+ end
497
+ end
498
+ end
499
+ end
500
+
501
+ # Remove suffixes from word.
502
+ #
503
+ # @param word [String] Word to process
504
+ # @param required_flags [Array<String>] Required suffix flags
505
+ # @param forbidden_flags [Array<String>] Forbidden suffix flags
506
+ # @param nested [Boolean] Whether this is a nested call
507
+ # @param crossproduct [Boolean] Whether suffix must have crossproduct
508
+ # @yield [AffixForm] Each form with suffix removed
509
+ def desuffix(word, required_flags: [], forbidden_flags: [], nested: false, crossproduct: false)
510
+ return enum_for(:desuffix, word,
511
+ required_flags: required_flags,
512
+ forbidden_flags: forbidden_flags,
513
+ nested: nested,
514
+ crossproduct: crossproduct) unless block_given?
515
+
516
+ suffixes_index = @aff[:suffixes_index] || {}
517
+ word_reversed = word.reverse
518
+
519
+ suffixes_index[word_reversed[0]] ||= []
520
+ suffixes_index[word_reversed[0]].each do |suffix|
521
+ # Check if suffix is valid
522
+ next if crossproduct && !suffix[:crossproduct]
523
+ next unless (required_flags - (suffix[:flags] || [])).empty?
524
+ next unless (forbidden_flags & (suffix[:flags] || [])).empty?
525
+
526
+ # Check if suffix matches
527
+ if word.end_with?(suffix[:affix])
528
+ # Remove suffix and add strip value
529
+ stem = word[0...-suffix[:affix].length] + suffix[:affix_data][:strip]
530
+
531
+ # Check condition (only if condition_checker is present)
532
+ next if suffix[:condition_checker] && !suffix[:condition_checker].matches?(stem)
533
+
534
+ yield AffixForm.new(word, stem, suffix: suffix)
535
+
536
+ # Try removing another suffix (one level only)
537
+ unless nested
538
+ desuffix(stem,
539
+ required_flags: [suffix[:flag], *required_flags],
540
+ forbidden_flags: forbidden_flags,
541
+ nested: true,
542
+ crossproduct: crossproduct) do |form2|
543
+ yield form2.replace(suffix2: suffix, text: word)
544
+ end
545
+ end
546
+ end
547
+ end
548
+ end
549
+
550
+ # Remove prefixes from word.
551
+ #
552
+ # @param word [String] Word to process
553
+ # @param required_flags [Array<String>] Required prefix flags
554
+ # @param forbidden_flags [Array<String>] Forbidden prefix flags
555
+ # @param nested [Boolean] Whether this is a nested call
556
+ # @yield [AffixForm] Each form with prefix removed
557
+ def deprefix(word, required_flags: [], forbidden_flags: [], nested: false)
558
+ return enum_for(:deprefix, word,
559
+ required_flags: required_flags,
560
+ forbidden_flags: forbidden_flags,
561
+ nested: nested) unless block_given?
562
+
563
+ prefixes_index = @aff[:prefixes_index] || {}
564
+
565
+ prefixes_index[word[0]] ||= []
566
+ prefixes_index[word[0]].each do |prefix|
567
+ # Check if prefix is valid
568
+ next unless (required_flags - (prefix[:flags] || [])).empty?
569
+ next unless (forbidden_flags & (prefix[:flags] || [])).empty?
570
+
571
+ # Check if prefix matches
572
+ if word.start_with?(prefix[:affix])
573
+ # Remove prefix and add strip value
574
+ stem = word[prefix[:affix].length..] + prefix[:affix_data][:strip]
575
+
576
+ # Check condition (only if condition_checker is present)
577
+ next if prefix[:condition_checker] && !prefix[:condition_checker].matches?(stem)
578
+
579
+ yield AffixForm.new(word, stem, prefix: prefix)
580
+
581
+ # Try removing another prefix if COMPLEXPREFIXES is set
582
+ unless nested || !@aff[:COMPLEXPREFIXES]
583
+ deprefix(stem,
584
+ required_flags: [prefix[:flag], *required_flags],
585
+ forbidden_flags: forbidden_flags,
586
+ nested: true) do |form2|
587
+ yield form2.replace(prefix2: prefix, text: word)
588
+ end
589
+ end
590
+ end
591
+ end
592
+ end
593
+
594
+ # Check if an affix form is valid.
595
+ #
596
+ # @param form [AffixForm] Form to check
597
+ # @param captype [Symbol] Original word's capitalization type
598
+ # @param allow_nosuggest [Boolean] Include NOSUGGEST words
599
+ # @return [Boolean] Whether form is valid
600
+ def is_good_form(form, captype:, allow_nosuggest:)
601
+ return false unless form.in_dictionary
602
+
603
+ root_flags = form.in_dictionary[:flags] || []
604
+ all_flags = form.flags
605
+
606
+ # Check NOSUGGEST
607
+ if !allow_nosuggest && @aff[:NOSUGGEST] && root_flags.include?(@aff[:NOSUGGEST])
608
+ return false
609
+ end
610
+
611
+ # Check KEEPCASE
612
+ if @aff[:KEEPCASE] && root_flags.include?(@aff[:KEEPCASE])
613
+ stem_captype = @aff[:casing].guess(form.in_dictionary[:stem])
614
+ return false if captype != stem_captype && !(@aff[:CHECKSHARPS] && form.in_dictionary[:stem].include?('ß'))
615
+ end
616
+
617
+ # Check NEEDAFFIX
618
+ if @aff[:NEEDAFFIX]
619
+ if root_flags.include?(@aff[:NEEDAFFIX]) && !form.has_affixes?
620
+ return false
621
+ end
622
+ if form.has_affixes? && form.all_affixes.all? { |a| (a[:flags] || []).include?(@aff[:NEEDAFFIX]) }
623
+ return false
624
+ end
625
+ end
626
+
627
+ # Check prefix flag compatibility
628
+ if form.prefix && !all_flags.include?(form.prefix[:flag])
629
+ return false
630
+ end
631
+
632
+ # Check suffix flag compatibility
633
+ if form.suffix && !all_flags.include?(form.suffix[:flag])
634
+ return false
635
+ end
636
+
637
+ # Check CIRCUMFIX
638
+ if @aff[:CIRCUMFIX]
639
+ suffix_has = form.suffix && (form.suffix[:flags] || []).include?(@aff[:CIRCUMFIX])
640
+ prefix_has = form.prefix && (form.prefix[:flags] || []).include?(@aff[:CIRCUMFIX])
641
+ return false if suffix_has != prefix_has
642
+ end
643
+
644
+ # If not checking compound position, just check ONLYINCOMPOUND
645
+ return !all_flags.include?(@aff[:ONLYINCOMPOUND])
646
+
647
+ true
648
+ end
649
+
650
+ # Generate compound forms by flags.
651
+ #
652
+ # @param word_rest [String] Remaining word to process
653
+ # @param captype [Symbol] Capitalization type
654
+ # @param depth [Integer] Current recursion depth
655
+ # @param allow_nosuggest [Boolean] Include NOSUGGEST words
656
+ # @yield [CompoundForm] Each valid compound form
657
+ def compounds_by_flags(word_rest, captype:, depth: 0, allow_nosuggest: true)
658
+ return enum_for(:compounds_by_flags, word_rest,
659
+ captype: captype,
660
+ depth: depth,
661
+ allow_nosuggest: allow_nosuggest) unless block_given?
662
+
663
+ aff = @aff
664
+ compound_min = aff[:COMPOUNDMIN] || 3
665
+ compound_word_max = aff[:COMPOUNDWORDMAX]
666
+ compound_begin = aff[:COMPOUNDBEGIN]
667
+ compound_middle = aff[:COMPOUNDMIDDLE]
668
+ compound_end = aff[:COMPOUNDEND]
669
+ compound_flag = aff[:COMPOUNDFLAG]
670
+ compound_permit_flag = aff[:COMPOUNDPERMITFLAG]
671
+ compound_forbid_flag = aff[:COMPOUNDFORBIDFLAG]
672
+
673
+ forbidden_flags = compound_forbid_flag ? [compound_forbid_flag] : []
674
+ permit_flags = compound_permit_flag ? [compound_permit_flag] : []
675
+
676
+ # Check if rest can be compound end
677
+ if depth.positive?
678
+ affix_forms_internal(word_rest, captype: captype, allow_nosuggest: allow_nosuggest) do |form|
679
+ # Check if form can be at compound end
680
+ if can_be_at_compound_pos?(form, :end, compound_begin, compound_middle, compound_end, compound_flag, permit_flags)
681
+ yield CompoundForm.new([form])
682
+ end
683
+ end
684
+ end
685
+
686
+ # Check compounding limits
687
+ return if word_rest.length < compound_min * 2
688
+ return if compound_word_max && depth >= compound_word_max
689
+
690
+ compoundpos = depth.zero? ? CompoundPos::BEGIN_POS : CompoundPos::MIDDLE
691
+ prefix_flags = compoundpos == CompoundPos::BEGIN_POS ? [] : permit_flags
692
+
693
+ # Try all possible split positions
694
+ (compound_min...(word_rest.length - compound_min + 1)).each do |pos|
695
+ beg = word_rest[0...pos]
696
+ rest = word_rest[pos..]
697
+
698
+ # Check if beg is a valid word at this position
699
+ affix_forms_internal(beg, captype: captype, allow_nosuggest: allow_nosuggest) do |form|
700
+ next unless can_be_at_compound_pos?(form, compoundpos, compound_begin, compound_middle, compound_end, compound_flag, permit_flags)
701
+
702
+ # Recursively check rest
703
+ compounds_by_flags(rest, captype: captype, depth: depth + 1, allow_nosuggest: allow_nosuggest) do |partial|
704
+ yield CompoundForm.new([form, *partial.parts])
705
+ end
706
+ end
707
+
708
+ # SIMPLIFIEDTRIPLE handling
709
+ if aff[:SIMPLIFIEDTRIPLE] && !beg.empty? && !rest.empty? && beg[-1] == rest[0]
710
+ affix_forms_internal(beg + beg[-1], captype: captype, allow_nosuggest: allow_nosuggest) do |form|
711
+ next unless can_be_at_compound_pos?(form, compoundpos, compound_begin, compound_middle, compound_end, compound_flag, permit_flags)
712
+
713
+ compounds_by_flags(rest, captype: captype, depth: depth + 1, allow_nosuggest: allow_nosuggest) do |partial|
714
+ yield CompoundForm.new([form.replace(text: beg), *partial.parts])
715
+ end
716
+ end
717
+ end
718
+ end
719
+ end
720
+
721
+ # Generate compound forms by rules.
722
+ #
723
+ # @param word_rest [String] Remaining word to process
724
+ # @param prev_parts [Array<Hash>] Previously processed parts
725
+ # @param rules [Array<Hash>] Valid compound rules
726
+ # @param allow_nosuggest [Boolean] Include NOSUGGEST words
727
+ # @yield [CompoundForm] Each valid compound form
728
+ def compounds_by_rules(word_rest, prev_parts: [], rules: nil, allow_nosuggest: true)
729
+ return enum_for(:compounds_by_rules, word_rest,
730
+ prev_parts: prev_parts,
731
+ rules: rules,
732
+ allow_nosuggest: allow_nosuggest) unless block_given?
733
+
734
+ aff = @aff
735
+ compound_min = aff[:COMPOUNDMIN] || 3
736
+ compound_word_max = aff[:COMPOUNDWORDMAX]
737
+ compound_rules = aff[:COMPOUNDRULE] || []
738
+
739
+ rules ||= compound_rules
740
+
741
+ # Check if rest can be a complete compound
742
+ if prev_parts.any?
743
+ homonyms = @dic[:homonyms]&.call(word_rest) || []
744
+ homonyms.each do |homonym|
745
+ parts = [*prev_parts, homonym]
746
+ flag_sets = parts.map { |p| p[:flags] || [] }
747
+
748
+ if compound_rules.any? { |rule| rule[:full_match]&.call(flag_sets) }
749
+ yield CompoundForm.new([AffixForm.new(word_rest, word_rest)])
750
+ end
751
+ end
752
+ end
753
+
754
+ # Check limits
755
+ return if word_rest.length < compound_min * 2
756
+ return if compound_word_max && prev_parts.length >= compound_word_max
757
+
758
+ # Try all possible split positions
759
+ (compound_min...(word_rest.length - compound_min + 1)).each do |pos|
760
+ beg = word_rest[0...pos]
761
+ homonyms = @dic[:homonyms]&.call(beg) || []
762
+
763
+ homonyms.each do |homonym|
764
+ parts = [*prev_parts, homonym]
765
+ flag_sets = parts.map { |p| p[:flags] || [] }
766
+
767
+ matching_rules = compound_rules.select { |rule| rule[:partial_match]&.call(flag_sets) }
768
+ next if matching_rules.empty?
769
+
770
+ compounds_by_rules(word_rest[pos..], prev_parts: parts, rules: matching_rules, allow_nosuggest: allow_nosuggest) do |partial|
771
+ yield CompoundForm.new([AffixForm.new(beg, beg), *partial.parts])
772
+ end
773
+ end
774
+ end
775
+ end
776
+
777
+ # Check if form can be at specified compound position.
778
+ #
779
+ # @param form [AffixForm] Form to check
780
+ # @param pos [Symbol] Compound position
781
+ # @return [Boolean]
782
+ def can_be_at_compound_pos?(form, pos, compound_begin, compound_middle, compound_end, compound_flag, permit_flags)
783
+ flags = form.flags
784
+
785
+ return true if compound_flag && flags.include?(compound_flag)
786
+
787
+ case pos
788
+ when CompoundPos::BEGIN_POS
789
+ flags.include?(compound_begin)
790
+ when CompoundPos::MIDDLE
791
+ flags.include?(compound_middle) || permit_flags.any? { |f| flags.include?(f) }
792
+ when CompoundPos::END_POS
793
+ flags.include?(compound_end) || permit_flags.any? { |f| flags.include?(f) }
794
+ else
795
+ false
796
+ end
797
+ end
798
+
799
+ # Check if compound form has any issues.
800
+ #
801
+ # @param compound [CompoundForm] Compound to check
802
+ # @param captype [Symbol] Capitalization type
803
+ # @return [Boolean] Whether compound is bad
804
+ def is_bad_compound(compound, captype)
805
+ aff = @aff
806
+
807
+ # FORCEUCASE check
808
+ if aff[:FORCEUCASE] && ![Capitalization::Type::ALL, Capitalization::Type::INIT].include?(captype)
809
+ if @dic[:has_flag]&.call(compound.parts.last.text, aff[:FORCEUCASE])
810
+ return true
811
+ end
812
+ end
813
+
814
+ # Check all adjacent pairs
815
+ compound.parts.each_with_index do |left_paradigm, idx|
816
+ break if idx >= compound.parts.length - 1
817
+
818
+ left = left_paradigm.text
819
+ right_paradigm = compound.parts[idx + 1]
820
+ right = right_paradigm.text
821
+
822
+ # COMPOUNDFORBIDFLAG check
823
+ if aff[:COMPOUNDFORBIDFLAG] && @dic[:has_flag]&.call(left, aff[:COMPOUNDFORBIDFLAG])
824
+ return true
825
+ end
826
+
827
+ # Check if "left right" exists as single dictionary entry
828
+ if @affix_forms&.call(left + ' ' + right, captype: captype)&.any?
829
+ return true
830
+ end
831
+
832
+ # CHECKCOMPOUNDREP check
833
+ if aff[:CHECKCOMPOUNDREP] && aff[:REP]
834
+ Kotoshu::Algorithms::Permutations.replchars(left + right, aff[:REP]) do |candidate|
835
+ if candidate.is_a?(String) && @affix_forms&.call(candidate, captype: captype)&.any?
836
+ return true
837
+ end
838
+ end
839
+ end
840
+
841
+ # CHECKCOMPOUNDTRIPLE check
842
+ if aff[:CHECKCOMPOUNDTRIPLE]
843
+ if (left[-2..] + right[0]).chars.uniq.length == 1 ||
844
+ (left[-1] + right[0..1]).chars.uniq.length == 1
845
+ return true
846
+ end
847
+ end
848
+
849
+ # CHECKCOMPOUNDCASE check
850
+ if aff[:CHECKCOMPOUNDCASE]
851
+ right_c = right[0]
852
+ left_c = left[-1]
853
+ if (right_c == right_c.upcase || left_c == left_c.upcase) && right_c != '-' && left_c != '-'
854
+ return true
855
+ end
856
+ end
857
+
858
+ # CHECKCOMPOUNDPATTERN check
859
+ if aff[:CHECKCOMPOUNDPATTERN]
860
+ if aff[:CHECKCOMPOUNDPATTERN].any? { |pattern| pattern[:match]&.call(left_paradigm, right_paradigm) }
861
+ return true
862
+ end
863
+ end
864
+
865
+ # CHECKCOMPOUNDDUP check
866
+ if aff[:CHECKCOMPOUNDDUP] && left == right && idx == compound.parts.length - 2
867
+ return true
868
+ end
869
+ end
870
+
871
+ false
872
+ end
873
+ end
874
+ end
875
+ end
876
+ end