kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,246 @@
1
+ # German confusion sets
2
+ # Line format: see en/confusion_sets.txt
3
+ # Run ConfusionSetLoaderTest.testConfusionSetSpelling after adding words to spell check all entries.
4
+
5
+ Genre -> Gerne; 1000; # p=1.000, r=0.588, f0.5=0.877, s=1.000, 1906+51, 3grams, 2021-11-08, fp=0, fn=21, tp=30, tn=1906, {wikipedia=2002}, {wikipedia=2002}
6
+ Hafen -> Harfen; 100; # p=1.000, r=0.198, f0.5=0.552, 862+506, 3grams, 2020-03-22
7
+ #Harfen -> Hafen; 10000000; # p=0.953, r=0.329, f0.5=0.691, 506+862, 3grams, 2020-03-22
8
+ Gbit -> Gibt; 100; # p=1.000, r=1.000, f0.5=1.000, 36+137, 3grams, 2020-02-01
9
+ Gibt -> Gbit; 100; # p=1.000, r=0.861, f0.5=0.969, 137+36, 3grams, 2020-02-01
10
+ #ach -> auch; 10000000 # p=0.998 r=0.746 f0.5=0.935 25+889 2020-03-16 f0.1=0.99 FA/10k=0.040 (commented due to sample size < 30)
11
+ #als -> also; 10 # p=0.989 r=0.438 f0.5=0.791 833+858 2020-03-16 f0.1=0.98 FA/10k=0.292 (commented due to precision < 0.99)
12
+ #also -> als; 10000000 # p=0.980 r=0.288 f0.5=0.662 858+833 2020-03-16 f0.1=0.96 FA/10k=0.532 (commented due to precision < 0.99)
13
+ #De -> Die; 10000000 # p=0.998 r=0.692 f0.5=0.917 114+734 2020-03-16 f0.1=0.99 FA/10k=0.047 (commented due to estimated false alarms rate > 0.025)
14
+ #Die -> De; 100 # p=1.000 r=0.491 f0.5=0.828 734+114 2020-03-16 f0.1=0.99 FA/10k=0.000
15
+ #dich -> doch; 1000; # p=0.963, r=0.830, f0.5=0.933, 918+827, 3grams, 2020-03-16
16
+ #dich -> doch; 10000; # p=0.976, r=0.750, f0.5=0.921, 918+827, 3grams, 2020-03-16
17
+ #dich -> doch; 100000; # p=0.983, r=0.681, f0.5=0.903, 918+827, 3grams, 2020-03-16
18
+ #dich -> doch; 1000000; # p=0.992, r=0.571, f0.5=0.864, 918+827, 3grams, 2020-03-16
19
+ #dich -> doch; 10000000; # p=0.995, r=0.486, f0.5=0.823, 918+827, 3grams, 2020-03-16
20
+ #dich -> doch; 1000000000; # p=0.992, r=0.310, f0.5=0.689, s=0.998, 918+827, 3grams, 2020-09-04
21
+ #doch -> dich; 1000 # p=1.000 r=0.558 f0.5=0.863 827+918 2020-03-16 f0.1=0.99 FA/10k=0.000
22
+ #dich -> dir; 10000 # p=0.997 r=0.621 f0.5=0.889 888+932 2020-03-16 f0.1=0.99 FA/10k=0.001
23
+ #dir -> dich; 1000 # p=0.998 r=0.590 f0.5=0.877 932+888 2020-03-16 f0.1=0.99 FA/10k=0.001
24
+ #drei -> frei; 100000 # p=1.000 r=0.449 f0.5=0.803 750+855 2020-03-16 f0.1=0.99 FA/10k=0.000
25
+ #frei -> drei; 100000 # p=1.000 r=0.741 f0.5=0.935 855+750 2020-03-16 f0.1=1.00 FA/10k=0.000
26
+ #ehelich -> ehrlich; 10 # p=1.000 r=0.942 f0.5=0.988 8+191 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
27
+ #ehrlich -> ehelich; 100 # p=1.000 r=0.250 f0.5=0.625 191+8 2020-03-16 f0.1=0.97 FA/10k=0.000 (commented due to sample size < 30)
28
+ fielen -> vielen; 1000000 # p=0.997 r=0.819 f0.5=0.956 896+891 2020-03-16 f0.1=0.99 FA/10k=0.002
29
+ #vielen -> fielen; 10 # p=0.996 r=0.916 f0.5=0.979 891+896 2020-03-16 f0.1=1.00 FA/10k=0.003
30
+ #fiele -> viele; 100 # p=0.999 r=0.941 f0.5=0.986 13+710 2020-03-16 f0.1=1.00 FA/10k=0.001 (commented due to sample size < 30)
31
+ #viele -> fiele; 1000 # p=1.000 r=0.154 f0.5=0.476 710+13 2020-03-16 f0.1=0.95 FA/10k=0.000 (commented due to sample size < 30)
32
+ fiel -> viel; 10000 # p=0.996 r=0.888 f0.5=0.973 986+947 2020-03-16 f0.1=0.99 FA/10k=0.006
33
+ viel -> fiel; 10 # p=0.999 r=0.743 f0.5=0.934 947+986 2020-03-16 f0.1=1.00 FA/10k=0.002
34
+ #Gebäck -> Gepäck; 100000 # p=1.000 r=0.414 f0.5=0.779 52+133 2020-03-16 f0.1=0.99 FA/10k=0.000
35
+ Gepäck -> Gebäck; 100 # p=1.000 r=0.250 f0.5=0.625 133+52 2020-03-16 f0.1=0.97 FA/10k=0.000
36
+ #Gerten -> Gärten; 10 # p=1.000 r=0.903 f0.5=0.979 2+536 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
37
+ #gesellt -> gestellt; 10 # p=1.000 r=0.883 f0.5=0.974 11+990 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
38
+ #gestellt -> gesellt; 100 # p=1.000 r=1.000 f0.5=1.000 990+11 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
39
+ #Hefe -> Hefte; 1000 # p=1.000 r=0.495 f0.5=0.830 89+190 2020-03-16 f0.1=0.99 FA/10k=0.000
40
+ #Hefte -> Hefe; 100 # p=1.000 r=0.348 f0.5=0.728 190+89 2020-03-16 f0.1=0.98 FA/10k=0.000
41
+ #häute -> heute; 10 # p=1.000 r=0.820 f0.5=0.958 0+721 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
42
+ #ihm -> im; 10; # p=0.996, r=0.960, f0.5=0.989, 903+825, 3grams, 2020-03-16#
43
+ #ihm -> im; 100; # p=1.000, r=0.949, f0.5=0.989, 903+825, 3grams, 2020-03-16
44
+ #ihm -> im; 1000; # p=1.000, r=0.933, f0.5=0.986, 903+825, 3grams, 2020-03-16
45
+ #ihm -> im; 10000; # p=1.000, r=0.898, f0.5=0.978, 903+825, 3grams, 2020-03-16
46
+ ihm -> im; 100000; # p=1.000, r=0.863, f0.5=0.969, 903+825, 3grams, 2020-03-16
47
+ #ihm -> im; 1000000; # p=1.000, r=0.833, f0.5=0.961, 903+825, 3grams, 2020-03-16
48
+ #ihm -> im; 10000000; # p=1.000, r=0.772, f0.5=0.944, 903+825, 3grams, 2020-03-16
49
+ #im -> ihm; 100 # p=0.999 r=0.956 f0.5=0.990 825+903 2020-03-16 f0.1=1.00 FA/10k=0.028 (commented due to estimated false alarms rate > 0.025)
50
+ #im -> um; 10; # p=0.991, r=0.816, f0.5=0.950, 821+669, 3grams, 2020-03-16
51
+ #im -> um; 100; # p=1.000, r=0.753, f0.5=0.939, 821+669, 3grams, 2020-03-16
52
+ #im -> um; 1000; # p=1.000, r=0.682, f0.5=0.915, 821+669, 3grams, 2020-03-16
53
+ #im -> um; 10000; # p=1.000, r=0.596, f0.5=0.881, 821+669, 3grams, 2020-03-16
54
+ im -> um; 100000; # p=1.000, r=0.487, f0.5=0.826, 821+669, 3grams, 2020-03-16
55
+ #im -> um; 1000000; # p=1.000, r=0.419, f0.5=0.783, 821+669, 3grams, 2020-03-16
56
+ #im -> um; 10000000; # p=1.000, r=0.353, f0.5=0.732, 821+669, 3grams, 2020-03-16
57
+ #um -> im; 1000 # p=0.997 r=0.864 f0.5=0.967 669+821 2020-03-16 f0.1=1.00 FA/10k=0.100 (commented due to estimated false alarms rate > 0.025)
58
+ #klingt -> klinkt; 1000 # p=1.000 r=1.000 f0.5=1.000 294+1 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
59
+ #klinkt -> klingt; 10 # p=1.000 r=0.908 f0.5=0.980 1+294 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
60
+ #Laib -> Leib; 100000 # p=0.985 r=0.744 f0.5=0.925 20+258 2020-03-16 f0.1=0.98 FA/10k=0.002 (commented due to precision < 0.99)
61
+ #Leib -> Laib; 10 # p=1.000 r=0.500 f0.5=0.833 258+20 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
62
+ #Laie -> Leihe; 10 # p=1.000 r=0.429 f0.5=0.789 36+7 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
63
+ #Leihe -> Laie; 100 # p=1.000 r=0.694 f0.5=0.919 7+36 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
64
+ #least -> liest; 10 # p=1.000 r=0.812 f0.5=0.956 25+356 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
65
+ #liest -> least; 10 # p=1.000 r=0.240 f0.5=0.612 356+25 2020-03-16 f0.1=0.97 FA/10k=0.000 (commented due to sample size < 30)
66
+ Leere -> Lehre; 100000 # p=0.998 r=0.662 f0.5=0.906 85+942 2020-03-16 f0.1=0.99 FA/10k=0.001
67
+ Lehre -> Leere; 10000 # p=1.000 r=0.329 f0.5=0.711 942+85 2020-03-16 f0.1=0.98 FA/10k=0.000
68
+ #legen -> lägen; 100 # p=1.000 r=0.292 f0.5=0.673 716+24 2020-03-16 f0.1=0.98 FA/10k=0.000 (commented due to sample size < 30)
69
+ #lägen -> legen; 10000 # p=0.998 r=0.585 f0.5=0.874 24+716 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
70
+ #Lid -> Lied; 10 # p=0.999 r=0.825 f0.5=0.958 5+960 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
71
+ #Maar -> Mahr; 100 # p=1.000 r=0.125 f0.5=0.417 75+16 2020-03-16 f0.1=0.94 FA/10k=0.000 (commented due to sample size < 30)
72
+ #Mahr -> Maar; 10 # p=1.000 r=0.573 f0.5=0.870 16+75 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
73
+ #Mahl -> Mal; 100000 # p=0.976 r=0.610 f0.5=0.871 41+264 2020-03-16 f0.1=0.97 FA/10k=0.006 (commented due to precision < 0.99)
74
+ #Mal -> Mahl; 1000 # p=1.000 r=0.366 f0.5=0.743 264+41 2020-03-16 f0.1=0.98 FA/10k=0.000
75
+ Mark -> Markt; 1000 # p=0.993 r=0.715 f0.5=0.921 866+954 2020-03-16 f0.1=0.99 FA/10k=0.004
76
+ Markt -> Mark; 10000 # p=1.000 r=0.305 f0.5=0.687 954+866 2020-03-16 f0.1=0.98 FA/10k=0.000
77
+ #Mathe -> Matte; 10000 # p=1.000 r=0.370 f0.5=0.746 52+27 2020-03-16 f0.1=0.98 FA/10k=0.000 (commented due to sample size < 30)
78
+ #Matte -> Mathe; 10 # p=1.000 r=0.365 f0.5=0.742 27+52 2020-03-16 f0.1=0.98 FA/10k=0.000 (commented due to sample size < 30)
79
+ Mediation -> Meditation; 10000 # p=1.000 r=0.211 f0.5=0.573 71+175 2020-03-16 f0.1=0.96 FA/10k=0.000
80
+ #mich -> mir; 10; # p=0.981, r=0.945, f0.5=0.973, 908+811, 3grams, 2020-03-16
81
+ #mich -> mir; 100; # p=0.990, r=0.896, f0.5=0.970, 908+811, 3grams, 2020-03-16
82
+ #mich -> mir; 1000; # p=0.994, r=0.815, f0.5=0.952, 908+811, 3grams, 2020-03-16
83
+ #mich -> mir; 10000; # p=0.998, r=0.725, f0.5=0.928, 908+811, 3grams, 2020-03-16
84
+ #mich -> mir; 100000; # p=0.998, r=0.644, f0.5=0.899, 908+811, 3grams, 2020-03-16
85
+ #mich -> mir; 1000000; # p=0.998, r=0.539, f0.5=0.853, 908+811, 3grams, 2020-03-16
86
+ #mich -> mir; 10000000; # p=0.997, r=0.440, f0.5=0.796, 908+811, 3grams, 2020-03-16
87
+ #mir -> mich; 10; # p=0.991, r=0.858, f0.5=0.961, 811+908, 3grams, 2020-03-16
88
+ #mir -> mich; 100; # p=1.000, r=0.780, f0.5=0.947, 811+908, 3grams, 2020-03-16
89
+ #mir -> mich; 1000; # p=1.000, r=0.671, f0.5=0.911, 811+908, 3grams, 2020-03-16
90
+ #mir -> mich; 10000; # p=1.000, r=0.566, f0.5=0.867, 811+908, 3grams, 2020-03-16
91
+ #mir -> mich; 100000; # p=1.000, r=0.430, f0.5=0.790, 811+908, 3grams, 2020-03-16
92
+ #mir -> mich; 1000000; # p=1.000, r=0.298, f0.5=0.680, 811+908, 3grams, 2020-03-16
93
+ #mir -> mich; 10000000; # p=1.000, r=0.193, f0.5=0.544, 811+908, 3grams, 2020-03-16
94
+ #Miene -> Mine; 10 # p=1.000 r=0.183 f0.5=0.528 35+126 2020-03-16 f0.1=0.96 FA/10k=0.000
95
+ Mine -> Miene; 1000 # p=1.000 r=0.829 f0.5=0.960 126+35 2020-03-16 f0.1=1.00 FA/10k=0.000
96
+ #mir -> mit; 1000 # p=0.996 r=0.807 f0.5=0.951 839+830 2020-03-16 f0.1=0.99 FA/10k=0.117 (commented due to estimated false alarms rate > 0.025)
97
+ #mir -> mit; 15000000; # p=0.998, r=0.647, f0.5=0.900, 839+830, 3grams, 2020-03-26
98
+ #mit -> mir; 10; # p=0.996, r=0.917, f0.5=0.979, 830+839, 3grams, 2020-03-16
99
+ #mit -> mir; 100; # p=1.000, r=0.877, f0.5=0.973, 830+839, 3grams, 2020-03-16
100
+ #mit -> mir; 1000; # p=1.000, r=0.836, f0.5=0.962, 830+839, 3grams, 2020-03-16
101
+ #mit -> mir; 10000; # p=1.000, r=0.756, f0.5=0.939, 830+839, 3grams, 2020-03-16
102
+ #mit -> mir; 100000; # p=1.000, r=0.682, f0.5=0.915, 830+839, 3grams, 2020-03-16
103
+ #mit -> mir; 1000000; # p=1.000, r=0.615, f0.5=0.889, 830+839, 3grams, 2020-03-16
104
+ mit -> mir; 40000000; # p=1.000, r=0.521, f0.5=0.845, 830+839, 3grams, 2020-03-16
105
+ Motte -> Motto; 1000 # p=0.997 r=0.896 f0.5=0.975 70+365 2020-03-16 f0.1=1.00 FA/10k=0.000
106
+ Motto -> Motte; 10 # p=1.000 r=0.629 f0.5=0.894 365+70 2020-03-16 f0.1=0.99 FA/10k=0.000
107
+ #Nachnahme -> Nachname; 100 # p=0.981 r=0.449 f0.5=0.793 5+118 2020-03-16 f0.1=0.97 FA/10k=0.000 (commented due to precision < 0.99)
108
+ #Nachname -> Nachnahme; 100 # p=1.000 r=0.400 f0.5=0.769 118+5 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
109
+ #oll -> soll; 10 # p=0.999 r=0.937 f0.5=0.986 5+858 2020-03-16 f0.1=1.00 FA/10k=0.002 (commented due to sample size < 30)
110
+ Patenten -> Patienten; 100000 # p=0.993 r=0.754 f0.5=0.934 36+974 2020-03-16 f0.1=0.99 FA/10k=0.002
111
+ #Rede -> Reede; 10 # p=1.000 r=0.194 f0.5=0.545 913+31 2020-03-16 f0.1=0.96 FA/10k=0.000
112
+ Reede -> Rede; 100000 # p=0.993 r=0.804 f0.5=0.949 31+913 2020-03-16 f0.1=0.99 FA/10k=0.004
113
+ #Reinfall -> Rheinfall; 10 # p=1.000 r=0.839 f0.5=0.963 10+31 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
114
+ #Rheinfall -> Reinfall; 10 # p=1.000 r=1.000 f0.5=1.000 31+10 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
115
+ Rezension -> Rezession; 1000 # p=1.000 r=0.312 f0.5=0.694 379+77 2020-03-16 f0.1=0.98 FA/10k=0.000
116
+ Rezession -> Rezension; 1000 # p=1.000 r=0.612 f0.5=0.888 77+379 2020-03-16 f0.1=0.99 FA/10k=0.000
117
+ #Route -> Rute; 10 # p=1.000 r=0.406 f0.5=0.774 488+32 2020-03-16 f0.1=0.99 FA/10k=0.000
118
+ Rute -> Route; 100 # p=1.000 r=0.398 f0.5=0.767 32+488 2020-03-16 f0.1=0.99 FA/10k=0.000
119
+ #Rum -> Ruhm; 10000000 # p=0.990 r=0.338 f0.5=0.714 179+296 2020-03-16 f0.1=0.97 FA/10k=0.001
120
+ scherzhaft -> schmerzhaft; 10000 # p=1.000 r=0.564 f0.5=0.866 77+39 2020-03-16 f0.1=0.99 FA/10k=0.000
121
+ schmerzhaft -> scherzhaft; 10 # p=1.000 r=0.273 f0.5=0.652 39+77 2020-03-16 f0.1=0.97 FA/10k=0.000
122
+ #Sand -> Stand; 1000000 # p=0.993 r=0.605 f0.5=0.880 684+473 2020-03-16 f0.1=0.99 FA/10k=0.003
123
+ #Stand -> Sand; 1000 # p=1.000 r=0.370 f0.5=0.746 473+684 2020-03-16 f0.1=0.98 FA/10k=0.000
124
+ #sehr -> seht; 100 # p=1.000 r=0.314 f0.5=0.696 950+35 2020-03-16 f0.1=0.98 FA/10k=0.000
125
+ seht -> sehr; 100000 # p=0.999 r=0.913 f0.5=0.980 35+950 2020-03-16 f0.1=1.00 FA/10k=0.003
126
+ seht -> sieht; 1000 # p=0.994 r=0.877 f0.5=0.968 34+964 2020-03-16 f0.1=0.99 FA/10k=0.004
127
+ sieht -> seht; 100 # p=1.000 r=0.382 f0.5=0.756 964+34 2020-03-16 f0.1=0.98 FA/10k=0.000
128
+ #Sigel -> Siegel; 10000 # p=1.000 r=0.455 f0.5=0.807 11+442 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
129
+ Sohle -> Sole; 10 # p=1.000 r=0.160 f0.5=0.488 60+75 2020-03-16 f0.1=0.95 FA/10k=0.000
130
+ Sole -> Sohle; 1000 # p=1.000 r=0.483 f0.5=0.824 75+60 2020-03-16 f0.1=0.99 FA/10k=0.000
131
+ #späht -> spät; 10 # p=0.999 r=0.981 f0.5=0.995 2+969 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
132
+ #staatlichen -> stattlichen; 100 # p=1.000 r=0.235 f0.5=0.606 798+17 2020-03-16 f0.1=0.97 FA/10k=0.000 (commented due to sample size < 30)
133
+ #stattlichen -> staatlichen; 100 # p=0.996 r=0.896 f0.5=0.974 17+798 2020-03-16 f0.1=0.99 FA/10k=0.002 (commented due to sample size < 30)
134
+ #staatlicher -> stattlicher; 100 # p=1.000 r=0.600 f0.5=0.882 242+5 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
135
+ #stattlicher -> staatlicher; 10 # p=1.000 r=0.876 f0.5=0.972 5+242 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
136
+ #staatliches -> stattliches; 100 # p=1.000 r=0.571 f0.5=0.870 62+7 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
137
+ #stattliches -> staatliches; 10 # p=1.000 r=0.758 f0.5=0.940 7+62 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
138
+ staatliche -> stattliche; 10000000 # p=1.000 r=0.100 f0.5=0.357 555+30 2020-03-16 f0.1=0.92 FA/10k=0.000
139
+ stattliche -> staatliche; 10000 # p=0.997 r=0.613 f0.5=0.886 30+555 2020-03-16 f0.1=0.99 FA/10k=0.001
140
+ #staatlich -> stattlich; 100 # p=1.000 r=0.667 f0.5=0.909 394+3 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
141
+ #stattlich -> staatlich; 1000 # p=1.000 r=0.805 f0.5=0.954 3+394 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
142
+ #Stand -> Strand; 1000 # p=1.000 r=0.355 f0.5=0.733 473+558 2020-03-16 f0.1=0.98 FA/10k=0.000
143
+ #Strand -> Stand; 10000000 # p=0.990 r=0.611 f0.5=0.881 558+473 2020-03-16 f0.1=0.98 FA/10k=0.003
144
+ #Stelle -> Ställe; 10 # p=1.000 r=0.261 f0.5=0.638 886+23 2020-03-16 f0.1=0.97 FA/10k=0.000 (commented due to sample size < 30)
145
+ #Ställe -> Stelle; 100000 # p=0.994 r=0.898 f0.5=0.973 23+886 2020-03-16 f0.1=0.99 FA/10k=0.006 (commented due to sample size < 30)
146
+ #Stele -> Stelle; 10000000 # p=0.966 r=0.800 f0.5=0.928 93+887 2020-03-16 f0.1=0.96 FA/10k=0.034 (commented due to precision < 0.99)
147
+ Stich -> Strich; 100000 # p=1.000 r=0.301 f0.5=0.683 260+206 2020-03-16 f0.1=0.98 FA/10k=0.000
148
+ Strich -> Stich; 1000 # p=1.000 r=0.358 f0.5=0.736 206+260 2020-03-16 f0.1=0.98 FA/10k=0.000
149
+ Stiel -> Stil; 10000 # p=0.992 r=0.622 f0.5=0.886 101+961 2020-03-16 f0.1=0.99 FA/10k=0.001
150
+ Stil -> Stiel; 100 # p=1.000 r=0.149 f0.5=0.466 961+101 2020-03-16 f0.1=0.95 FA/10k=0.000
151
+ #strickt -> strikt; 10 # p=1.000 r=0.887 f0.5=0.975 8+203 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
152
+ #strikt -> strickt; 1000 # p=1.000 r=0.500 f0.5=0.833 203+8 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
153
+ Uhrzeit -> Urzeit; 10000 # p=1.000 r=0.515 f0.5=0.842 116+33 2020-03-16 f0.1=0.99 FA/10k=0.000
154
+ Urzeit -> Uhrzeit; 100 # p=1.000 r=0.422 f0.5=0.785 33+116 2020-03-16 f0.1=0.99 FA/10k=0.000
155
+ #und -> uns; 100 # p=0.998 r=0.693 f0.5=0.918 590+903 2020-03-16 f0.1=0.99 FA/10k=0.224 (commented due to estimated false alarms rate > 0.025)
156
+ #uns -> und; 10000000 # p=0.991 r=0.759 f0.5=0.934 903+590 2020-03-16 f0.1=0.99 FA/10k=1.007 (commented due to estimated false alarms rate > 0.025)
157
+ #vage -> wage; 10 # p=1.000 r=0.905 f0.5=0.979 76+21 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
158
+ #wage -> vage; 10 # p=1.000 r=0.908 f0.5=0.980 21+76 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
159
+ #Vasen -> Wasen; 100 # p=1.000 r=0.708 f0.5=0.924 39+24 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
160
+ #Wasen -> Vasen; 1000 # p=1.000 r=0.538 f0.5=0.854 24+39 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
161
+ verwaist -> verweist; 10000000 # p=0.993 r=0.458 f0.5=0.805 180+297 2020-03-16 f0.1=0.98 FA/10k=0.001
162
+ #Villen -> Willen; 1000000 # p=0.997 r=0.843 f0.5=0.961 142+699 2020-03-16 f0.1=1.00 FA/10k=0.001
163
+ Willen -> Villen; 100 # p=1.000 r=0.232 f0.5=0.602 699+142 2020-03-16 f0.1=0.97 FA/10k=0.000
164
+ #Waagen -> Wagen; 10000 # p=0.998 r=0.588 f0.5=0.876 19+857 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
165
+ #Waise -> Weise; 1000000 # p=0.999 r=0.792 f0.5=0.949 21+864 2020-03-16 f0.1=1.00 FA/10k=0.002 (commented due to sample size < 30)
166
+ ward -> wart; 100 # p=1.000 r=0.357 f0.5=0.735 52+42 2020-03-16 f0.1=0.98 FA/10k=0.000
167
+ #wart -> ward; 1000 # p=1.000 r=0.654 f0.5=0.904 42+52 2020-03-16 f0.1=0.99 FA/10k=0.000
168
+ #waren -> warnen; 1000 # p=1.000 r=0.518 f0.5=0.843 905+114 2020-03-16 f0.1=0.99 FA/10k=0.000
169
+ #warnen -> waren; 10000000 # p=0.991 r=0.701 f0.5=0.915 114+905 2020-03-16 f0.1=0.99 FA/10k=0.030 (commented due to estimated false alarms rate > 0.025)
170
+ weiht -> weit; 10000 # p=1.000 r=0.945 f0.5=0.989 42+928 2020-03-16 f0.1=1.00 FA/10k=0.000
171
+ #weit -> weiht; 10 # p=1.000 r=0.214 f0.5=0.577 928+42 2020-03-16 f0.1=0.96 FA/10k=0.000
172
+ weist -> weißt; 100 # p=0.996 r=0.929 f0.5=0.982 986+283 2020-03-16 f0.1=1.00 FA/10k=0.001
173
+ #weißt -> weist; 10 # p=0.994 r=0.909 f0.5=0.976 283+986 2020-03-16 f0.1=0.99 FA/10k=0.002
174
+ Welle -> Wälle; 100 # p=1.000 r=0.302 f0.5=0.684 764+43 2020-03-16 f0.1=0.98 FA/10k=0.000
175
+ Wälle -> Welle; 100000 # p=0.998 r=0.571 f0.5=0.868 43+764 2020-03-16 f0.1=0.99 FA/10k=0.000
176
+ wer -> wär; 100 # p=1.000 r=0.615 f0.5=0.889 171+52 2020-03-16 f0.1=0.99 FA/10k=0.000
177
+ #wär -> wer; 1000 # p=0.994 r=0.965 f0.5=0.988 52+171 2020-03-16 f0.1=0.99 FA/10k=0.002
178
+ wie -> wir; 10000000; # p=0.998, r=0.406, f0.5=0.773, s=0.999, 1913+1098, 3grams, 2021-11-24, fp=1, fn=652, tp=446, tn=1912, {wikipedia=2002}, {wikipedia=2002}
179
+ wir -> wie; 10000000; # p=0.987, r=0.446, f0.5=0.794, s=0.990, 1098+1913, 3grams, 2021-11-24, fp=11, fn=1060, tp=853, tn=1087, {wikipedia=2002}, {wikipedia=2002}
180
+ wir -> wird; 150000000; # p=0.984, r=0.252, f0.5=0.623, s=0.993, 1092+2000, 3grams, 2021-02-05, fp=8, fn=1496, tp=504, tn=1084, {wikipedia=2002}, {wikipedia=2002}
181
+ #wird -> wir; 1000 # p=0.995 r=0.852 f0.5=0.963 884+243 2020-03-16 f0.1=0.99 FA/10k=0.095 (commented due to estimated false alarms rate > 0.025)
182
+ #Würste -> Wüste; 10000 # p=0.996 r=0.571 f0.5=0.867 25+466 2020-03-16 f0.1=0.99 FA/10k=0.000 (commented due to sample size < 30)
183
+ #Zeilen -> Zielen; 100000 # p=0.990 r=0.375 f0.5=0.746 407+253 2020-03-16 f0.1=0.97 FA/10k=0.002
184
+ #Zielen -> Zeilen; 100000 # p=1.000 r=0.381 f0.5=0.755 253+407 2020-03-16 f0.1=0.98 FA/10k=0.000
185
+ #Zeile -> Ziele; 1000000 # p=0.998 r=0.540 f0.5=0.853 372+976 2020-03-16 f0.1=0.99 FA/10k=0.001
186
+ #Ziele -> Zeile; 1000 # p=1.000 r=0.565 f0.5=0.866 976+372 2020-03-16 f0.1=0.99 FA/10k=0.000
187
+ #zischen -> zwischen; 1000 # p=0.999 r=0.884 f0.5=0.973 3+766 2020-03-16 f0.1=1.00 FA/10k=0.005 (commented due to sample size < 30)
188
+ #Zunahme -> Zuname; 10 # p=1.000 r=0.200 f0.5=0.556 284+5 2020-03-16 f0.1=0.96 FA/10k=0.000 (commented due to sample size < 30)
189
+ #Zuname -> Zunahme; 10 # p=1.000 r=0.979 f0.5=0.996 5+284 2020-03-16 f0.1=1.00 FA/10k=0.000 (commented due to sample size < 30)
190
+
191
+ #aus -> auf; 1000 # p=0.997 r=0.433 f0.5=0.791 802+804 2020-03-16 f0.1=0.98 FA/10k=0.118 (commented due to estimated false alarms rate > 0.025)
192
+ #auf -> aus; 1000 # p=0.984 r=0.459 f0.5=0.801 804+802 2020-03-16 f0.1=0.97 FA/10k=0.630 (commented due to precision < 0.99)
193
+ #biss -> bis; 10000000 # p=0.994 r=0.801 f0.5=0.948 37+799 2020-03-16 f0.1=0.99 FA/10k=0.030 (commented due to estimated false alarms rate > 0.025)
194
+ #braten -> beraten; 10000000 # p=0.981 r=0.278 f0.5=0.652 17+187 2020-03-16 f0.1=0.96 FA/10k=0.001 (commented due to precision < 0.99)
195
+ #das -> dass; 10 # p=0.998 r=0.879 f0.5=0.971 538+975 2020-03-16 f0.1=1.00 FA/10k=0.073 (commented due to estimated false alarms rate > 0.025)
196
+ #dass -> das; 1000000 # p=0.991 r=0.810 f0.5=0.949 975+538 2020-03-16 f0.1=0.99 FA/10k=0.329 (commented due to estimated false alarms rate > 0.025)
197
+ #den -> denn; 10 # p=0.995 r=0.785 f0.5=0.945 910+834 2020-03-16 f0.1=0.99 FA/10k=0.238 (commented due to estimated false alarms rate > 0.025)
198
+ #denn -> den; 1000000 # p=0.993 r=0.882 f0.5=0.968 834+910 2020-03-16 f0.1=0.99 FA/10k=0.333 (commented due to estimated false alarms rate > 0.025)
199
+ #de -> die; 1000 # p=0.982 r=0.797 f0.5=0.938 565+547 2020-03-16 f0.1=0.98 FA/10k=2.323 (commented due to precision < 0.99)
200
+ #die -> de; 10 # p=1.000 r=0.873 f0.5=0.972 547+565 2020-03-16 f0.1=1.00 FA/10k=0.000
201
+ #die -> sie; 10 # p=0.996 r=0.759 f0.5=0.938 559+336 2020-03-16 f0.1=0.99 FA/10k=0.564 (commented due to estimated false alarms rate > 0.025)
202
+ #sie -> die; 100000 # p=1.000 r=0.606 f0.5=0.885 336+559 2020-03-16 f0.1=0.99 FA/10k=0.000
203
+ fasst -> fast; 10000000 # p=0.990 r=0.754 f0.5=0.932 311+813 2020-03-16 f0.1=0.99 FA/10k=0.011
204
+ fast -> fasst; 100 # p=1.000 r=0.341 f0.5=0.721 813+311 2020-03-16 f0.1=0.98 FA/10k=0.000
205
+ furchtbar -> fruchtbar; 10000 # p=1.000 r=0.431 f0.5=0.791 56+109 2020-03-16 f0.1=0.99 FA/10k=0.000
206
+ fruchtbar -> furchtbar; 10 # p=1.000 r=0.696 f0.5=0.920 109+56 2020-03-16 f0.1=1.00 FA/10k=0.000
207
+ Gesten -> Gestern; 10000 # p=0.996 r=0.909 f0.5=0.977 77+252 2020-03-16 f0.1=1.00 FA/10k=0.000
208
+ #Gestern -> Gesten; 10 # p=1.000 r=0.701 f0.5=0.922 252+77 2020-03-16 f0.1=1.00 FA/10k=0.000
209
+ gewaschen -> gewachsen; 1000000 # p=0.994 r=0.534 f0.5=0.848 102+311 2020-03-16 f0.1=0.99 FA/10k=0.001
210
+ gewachsen -> gewaschen; 10 # p=1.000 r=0.647 f0.5=0.902 311+102 2020-03-16 f0.1=0.99 FA/10k=0.000
211
+ Grat -> Grad; 1000000 # p=0.993 r=0.685 f0.5=0.911 51+855 2020-03-16 f0.1=0.99 FA/10k=0.001
212
+ Graf -> Graph; 100 # p=1.000 r=0.303 f0.5=0.685 927+178 2020-03-16 f0.1=0.98 FA/10k=0.000
213
+ #Graph -> Graf; 1000000 # p=0.999 r=0.728 f0.5=0.929 178+927 2020-03-16 f0.1=1.00 FA/10k=0.000
214
+ gründlich -> grünlich; 100 # p=1.000 r=0.264 f0.5=0.642 137+53 2020-03-16 f0.1=0.97 FA/10k=0.000
215
+ grünlich -> gründlich; 1000000 # p=1.000 r=0.387 f0.5=0.759 53+137 2020-03-16 f0.1=0.98 FA/10k=0.000
216
+ #hast -> hasst; 10 # p=1.000 r=0.396 f0.5=0.766 509+101 2020-03-16 f0.1=0.99 FA/10k=0.000
217
+ #hasst -> hast; 1000000 # p=0.985 r=0.880 f0.5=0.962 101+509 2020-03-16 f0.1=0.98 FA/10k=0.002 (commented due to precision < 0.99)
218
+ #leeren -> lehren; 100000 # p=1.000 r=0.388 f0.5=0.760 210+152 2020-03-16 f0.1=0.98 FA/10k=0.000
219
+ lehren -> leeren; 10 # p=1.000 r=0.900 f0.5=0.978 152+210 2020-03-16 f0.1=1.00 FA/10k=0.000
220
+ #ließ -> lies; 100 # p=1.000 r=0.381 f0.5=0.755 979+21 2020-03-16 f0.1=0.98 FA/10k=0.000 (commented due to sample size < 30)
221
+ #lies -> ließ; 10 # p=0.991 r=0.906 f0.5=0.973 21+979 2020-03-16 f0.1=0.99 FA/10k=0.005 (commented due to sample size < 30)
222
+ Moor -> Mohr; 1000 # p=1.000 r=0.487 f0.5=0.826 203+413 2020-03-16 f0.1=0.99 FA/10k=0.000
223
+ #Mohr -> Moor; 100 # p=0.963 r=0.384 f0.5=0.740 413+203 2020-03-16 f0.1=0.95 FA/10k=0.001 (commented due to precision < 0.99)
224
+ reist -> reißt; 10000 # p=1.000 r=0.256 f0.5=0.632 141+90 2020-03-16 f0.1=0.97 FA/10k=0.000
225
+ #reißt -> reist; 10 # p=0.986 r=0.496 f0.5=0.824 90+141 2020-03-16 f0.1=0.98 FA/10k=0.000 (commented due to precision < 0.99)
226
+ #Saite -> Seite; 10000000 # p=0.975 r=0.413 f0.5=0.766 130+927 2020-03-16 f0.1=0.96 FA/10k=0.033 (commented due to precision < 0.99)
227
+ #Seite -> Saite; 10 # p=1.000 r=0.131 f0.5=0.429 927+130 2020-03-16 f0.1=0.94 FA/10k=0.000
228
+ #seid -> seit; 10000000 # p=0.998 r=0.868 f0.5=0.969 336+947 2020-03-16 f0.1=1.00 FA/10k=0.003
229
+ #seit -> seid; 100 # p=1.000 r=0.932 f0.5=0.986 947+336 2020-03-16 f0.1=1.00 FA/10k=0.000
230
+ Städte -> Stätte; 100 # p=1.000 r=0.289 f0.5=0.670 859+97 2020-03-16 f0.1=0.98 FA/10k=0.000
231
+ Stätte -> Städte; 100000 # p=0.990 r=0.672 f0.5=0.904 97+859 2020-03-16 f0.1=0.99 FA/10k=0.003
232
+ unternehmen -> Unternehmen; 10 # p=1.000 r=0.879 f0.5=0.973 74+1000 2020-03-16 f0.1=1.00 FA/10k=0.000
233
+ Unternehmen -> unternehmen; 10000 # p=1.000 r=0.703 f0.5=0.922 1000+74 2020-03-16 f0.1=1.00 FA/10k=0.000
234
+ #wahr -> war; 10000000 # p=0.978 r=0.865 f0.5=0.953 964+961 2020-03-16 f0.1=0.98 FA/10k=0.209 (commented due to precision < 0.99)
235
+ #war -> wahr; 10 # p=0.998 r=0.669 f0.5=0.909 961+964 2020-03-16 f0.1=0.99 FA/10k=0.019
236
+ #wenn -> wen; 1000 # p=1.000 r=0.424 f0.5=0.786 541+125 2020-03-16 f0.1=0.99 FA/10k=0.000
237
+ #ei -> ein; 1000000 # p=0.992 r=0.772 f0.5=0.938 45+950 2020-03-16 f0.1=0.99 FA/10k=0.151 (commented due to estimated false alarms rate > 0.025)
238
+ #ein -> ei; 10 # p=1.000 r=0.133 f0.5=0.435 950+45 2020-03-16 f0.1=0.94 FA/10k=0.000
239
+ #werden -> werfen; 10 # p=1.000 r=0.233 f0.5=0.603 910+339 2020-03-16 f0.1=0.97 FA/10k=0.000
240
+ #werfen -> werden; 10000000 # p=0.955 r=0.659 f0.5=0.877 339+910 2020-03-16 f0.1=0.95 FA/10k=0.740 (commented due to precision < 0.99)
241
+ Rat -> Rad; 100 # p=1.000 r=0.304 f0.5=0.685 944+481 2020-03-16 f0.1=0.98 FA/10k=0.000
242
+ #Rad -> Rat; 100000 # p=0.996 r=0.713 f0.5=0.922 481+944 2020-03-16 f0.1=0.99 FA/10k=0.001
243
+ #Wal -> Wahl; 10000000 # p=0.996 r=0.760 f0.5=0.938 143+926 2020-03-16 f0.1=0.99 FA/10k=0.002
244
+ weit -> weint; 100000; # p=1.000, r=0.167, f0.5=0.500, 928+60, 3grams, 2020-05-25
245
+ #weint -> weit; 10000000; # p=0.999, r=0.858, f0.5=0.967, 60+928, 3grams, 2020-05-25
246
+ kotet -> kostet; 100000; # p=0.996, r=0.561, f0.5=0.862, s=0.286, 7+1997, 3grams, 2022-10-16, fp=5, fn=877, tp=1120, tn=2, {wikipedia=8}, {wikipedia=2002}