kotoshu 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (210) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +18 -0
  4. data/CHANGELOG.md +182 -0
  5. data/CLAUDE.md +172 -0
  6. data/CODE_OF_CONDUCT.md +132 -0
  7. data/LICENSE +31 -0
  8. data/README.adoc +955 -0
  9. data/Rakefile +12 -0
  10. data/SECURITY.md +93 -0
  11. data/examples/01_basic_word_checking.rb +38 -0
  12. data/examples/02_text_document_checking.rb +77 -0
  13. data/examples/03_dictionary_backends.rb +137 -0
  14. data/examples/04_trie_data_structure.rb +146 -0
  15. data/examples/05_suggestion_algorithms.rb +239 -0
  16. data/examples/06_configuration_advanced.rb +287 -0
  17. data/examples/07_multi_language_dictionaries.rb +278 -0
  18. data/exe/kotoshu +6 -0
  19. data/lib/kotoshu/algorithms/capitalization.rb +276 -0
  20. data/lib/kotoshu/algorithms/lookup.rb +876 -0
  21. data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
  22. data/lib/kotoshu/algorithms/permutations.rb +283 -0
  23. data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
  24. data/lib/kotoshu/algorithms/suggest.rb +575 -0
  25. data/lib/kotoshu/algorithms.rb +14 -0
  26. data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
  27. data/lib/kotoshu/cache/base_cache.rb +596 -0
  28. data/lib/kotoshu/cache/cache.rb +91 -0
  29. data/lib/kotoshu/cache/frequency_cache.rb +224 -0
  30. data/lib/kotoshu/cache/language_cache.rb +454 -0
  31. data/lib/kotoshu/cache/lookup_cache.rb +166 -0
  32. data/lib/kotoshu/cache/model_cache.rb +513 -0
  33. data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
  34. data/lib/kotoshu/cache.rb +40 -0
  35. data/lib/kotoshu/cli/auto_setup.rb +71 -0
  36. data/lib/kotoshu/cli/batch_reporter.rb +315 -0
  37. data/lib/kotoshu/cli/cache_command.rb +356 -0
  38. data/lib/kotoshu/cli/display_formatter.rb +431 -0
  39. data/lib/kotoshu/cli/errors.rb +36 -0
  40. data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
  41. data/lib/kotoshu/cli/language_resolver.rb +91 -0
  42. data/lib/kotoshu/cli/navigation_manager.rb +272 -0
  43. data/lib/kotoshu/cli/progress_reporter.rb +114 -0
  44. data/lib/kotoshu/cli/status_report.rb +130 -0
  45. data/lib/kotoshu/cli.rb +627 -0
  46. data/lib/kotoshu/commands/cache_command.rb +424 -0
  47. data/lib/kotoshu/commands/check_command.rb +312 -0
  48. data/lib/kotoshu/commands/model_command.rb +295 -0
  49. data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
  50. data/lib/kotoshu/components/pos_tagger.rb +98 -0
  51. data/lib/kotoshu/components/spell_checker.rb +73 -0
  52. data/lib/kotoshu/components/synthesizer.rb +60 -0
  53. data/lib/kotoshu/components/tokenizer.rb +58 -0
  54. data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
  55. data/lib/kotoshu/configuration/builder.rb +209 -0
  56. data/lib/kotoshu/configuration/resolver.rb +124 -0
  57. data/lib/kotoshu/configuration.rb +702 -0
  58. data/lib/kotoshu/core/exceptions.rb +165 -0
  59. data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
  60. data/lib/kotoshu/core/models/affix_rule.rb +260 -0
  61. data/lib/kotoshu/core/models/result/document_result.rb +263 -0
  62. data/lib/kotoshu/core/models/result/word_result.rb +203 -0
  63. data/lib/kotoshu/core/models/word.rb +142 -0
  64. data/lib/kotoshu/core/trie/builder.rb +119 -0
  65. data/lib/kotoshu/core/trie/node.rb +94 -0
  66. data/lib/kotoshu/core/trie/trie.rb +249 -0
  67. data/lib/kotoshu/core.rb +28 -0
  68. data/lib/kotoshu/data/common_words/de.yml +1800 -0
  69. data/lib/kotoshu/data/common_words/en.yml +1215 -0
  70. data/lib/kotoshu/data/common_words/es.yml +750 -0
  71. data/lib/kotoshu/data/common_words/fr.yml +1015 -0
  72. data/lib/kotoshu/data/common_words/pt.yml +870 -0
  73. data/lib/kotoshu/data/common_words/ru.yml +484 -0
  74. data/lib/kotoshu/data/common_words_loader.rb +152 -0
  75. data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
  76. data/lib/kotoshu/debug_logger.rb +146 -0
  77. data/lib/kotoshu/debug_mode.rb +134 -0
  78. data/lib/kotoshu/defaults.rb +86 -0
  79. data/lib/kotoshu/dictionaries/catalog.rb +817 -0
  80. data/lib/kotoshu/dictionary/base.rb +237 -0
  81. data/lib/kotoshu/dictionary/cspell.rb +254 -0
  82. data/lib/kotoshu/dictionary/custom.rb +224 -0
  83. data/lib/kotoshu/dictionary/hunspell.rb +526 -0
  84. data/lib/kotoshu/dictionary/plain_text.rb +282 -0
  85. data/lib/kotoshu/dictionary/repository.rb +248 -0
  86. data/lib/kotoshu/dictionary/unified.rb +260 -0
  87. data/lib/kotoshu/dictionary/unix_words.rb +218 -0
  88. data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
  89. data/lib/kotoshu/documents/document.rb +229 -0
  90. data/lib/kotoshu/documents/location.rb +139 -0
  91. data/lib/kotoshu/documents/markdown_document.rb +389 -0
  92. data/lib/kotoshu/documents/plain_text_document.rb +147 -0
  93. data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
  94. data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
  95. data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
  96. data/lib/kotoshu/embeddings/protocol.rb +83 -0
  97. data/lib/kotoshu/embeddings/protocols.rb +17 -0
  98. data/lib/kotoshu/embeddings/registry.rb +182 -0
  99. data/lib/kotoshu/embeddings/search.rb +192 -0
  100. data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
  101. data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
  102. data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
  103. data/lib/kotoshu/embeddings.rb +97 -0
  104. data/lib/kotoshu/fluent_checker.rb +91 -0
  105. data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
  106. data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
  107. data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
  108. data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
  109. data/lib/kotoshu/grammar/rule.rb +95 -0
  110. data/lib/kotoshu/grammar/rule_engine.rb +111 -0
  111. data/lib/kotoshu/grammar/rule_loader.rb +31 -0
  112. data/lib/kotoshu/grammar.rb +18 -0
  113. data/lib/kotoshu/integrity/audit_log.rb +88 -0
  114. data/lib/kotoshu/integrity/manifest.rb +117 -0
  115. data/lib/kotoshu/integrity/net_http.rb +46 -0
  116. data/lib/kotoshu/integrity.rb +25 -0
  117. data/lib/kotoshu/keyboard/layout.rb +115 -0
  118. data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
  119. data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
  120. data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
  121. data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
  122. data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
  123. data/lib/kotoshu/keyboard/registry.rb +146 -0
  124. data/lib/kotoshu/keyboard.rb +60 -0
  125. data/lib/kotoshu/language/detector.rb +242 -0
  126. data/lib/kotoshu/language/identifier.rb +378 -0
  127. data/lib/kotoshu/language/languages/base.rb +256 -0
  128. data/lib/kotoshu/language/normalizer/base.rb +137 -0
  129. data/lib/kotoshu/language/registry.rb +147 -0
  130. data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
  131. data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
  132. data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
  133. data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
  134. data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
  135. data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
  136. data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
  137. data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
  138. data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
  139. data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
  140. data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
  141. data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
  142. data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
  143. data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
  144. data/lib/kotoshu/language/tokenizer/base.rb +170 -0
  145. data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
  146. data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
  147. data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
  148. data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
  149. data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
  150. data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
  151. data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
  152. data/lib/kotoshu/language.rb +99 -0
  153. data/lib/kotoshu/languages/de/language.rb +546 -0
  154. data/lib/kotoshu/languages/en/language.rb +448 -0
  155. data/lib/kotoshu/languages/es/language.rb +459 -0
  156. data/lib/kotoshu/languages/fr/language.rb +493 -0
  157. data/lib/kotoshu/languages/ja/language.rb +477 -0
  158. data/lib/kotoshu/languages/pt/language.rb +423 -0
  159. data/lib/kotoshu/languages/ru/language.rb +404 -0
  160. data/lib/kotoshu/languages.rb +43 -0
  161. data/lib/kotoshu/metrics_collector.rb +222 -0
  162. data/lib/kotoshu/metrics_module.rb +110 -0
  163. data/lib/kotoshu/models/context.rb +119 -0
  164. data/lib/kotoshu/models/embedding_model.rb +182 -0
  165. data/lib/kotoshu/models/fasttext_model.rb +220 -0
  166. data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
  167. data/lib/kotoshu/models/onnx_model.rb +333 -0
  168. data/lib/kotoshu/models/semantic_error.rb +165 -0
  169. data/lib/kotoshu/models/suggestion.rb +106 -0
  170. data/lib/kotoshu/models/word_embedding.rb +107 -0
  171. data/lib/kotoshu/paths.rb +53 -0
  172. data/lib/kotoshu/personal_dictionary.rb +94 -0
  173. data/lib/kotoshu/plugins/plugin.rb +61 -0
  174. data/lib/kotoshu/plugins/registry.rb +120 -0
  175. data/lib/kotoshu/project_config.rb +76 -0
  176. data/lib/kotoshu/readers/aff_data.rb +356 -0
  177. data/lib/kotoshu/readers/aff_reader.rb +375 -0
  178. data/lib/kotoshu/readers/condition_checker.rb +142 -0
  179. data/lib/kotoshu/readers/dic_reader.rb +118 -0
  180. data/lib/kotoshu/readers/file_reader.rb +347 -0
  181. data/lib/kotoshu/readers/lookup_builder.rb +299 -0
  182. data/lib/kotoshu/readers/readers.rb +6 -0
  183. data/lib/kotoshu/readers.rb +9 -0
  184. data/lib/kotoshu/resource_bundle.rb +30 -0
  185. data/lib/kotoshu/resource_manager.rb +295 -0
  186. data/lib/kotoshu/results/result.rb +165 -0
  187. data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
  188. data/lib/kotoshu/source_registry.rb +74 -0
  189. data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
  190. data/lib/kotoshu/spellchecker.rb +298 -0
  191. data/lib/kotoshu/string_metrics.rb +153 -0
  192. data/lib/kotoshu/suggestions/context.rb +55 -0
  193. data/lib/kotoshu/suggestions/generator.rb +175 -0
  194. data/lib/kotoshu/suggestions/pipeline.rb +135 -0
  195. data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
  196. data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
  197. data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
  198. data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
  199. data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
  200. data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
  201. data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
  202. data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
  203. data/lib/kotoshu/suggestions/suggestion.rb +174 -0
  204. data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
  205. data/lib/kotoshu/version.rb +5 -0
  206. data/lib/kotoshu.rb +493 -0
  207. data/script/validate_all_dictionaries.rb +444 -0
  208. data/sig/kotoshu.rbs +4 -0
  209. data/test_oop.rb +79 -0
  210. metadata +298 -0
@@ -0,0 +1,76 @@
1
+ # See the English 'confusion_sets.txt' for a description of file format
2
+
3
+ bon -> bond; 100 # p=1.000 r=0.190 f0.5=0.541 347+21 2020-03-17 f0.1=0.96 FA/10k=0.000
4
+ bond -> bon; 10000 # p=1.000 r=0.790 f0.5=0.949 21+347 2020-03-17 f0.1=1.00 FA/10k=0.000
5
+ #ce -> se; 100 # p=0.997 r=0.933 f0.5=0.984 337+690 2020-03-17 f0.1=1.00 FA/10k=0.179 (commented due to estimated false alarms rate > 0.025)
6
+ #se -> ce; 100 # p=1.000 r=0.760 f0.5=0.940 690+337 2020-03-17 f0.1=1.00 FA/10k=0.000 commented due to false positives
7
+ #cent -> sans; 10000 # p=0.997 r=0.800 f0.5=0.951 242+466 2020-03-17 f0.1=0.99 FA/10k=0.031 (commented due to estimated false alarms rate > 0.025)
8
+ #sans -> cent; 1000000 # p=1.000 r=0.463 f0.5=0.812 466+242 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
9
+ #cette -> sept; 1000 # p=1.000 r=0.624 f0.5=0.892 360+404 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
10
+ #sept -> cette; 1000000 # p=1.000 r=0.908 f0.5=0.980 404+360 2020-03-17 f0.1=1.00 FA/10k=0.000 commented due to false positives
11
+ dans -> dent; 10000 # p=1.000 r=0.656 f0.5=0.905 217+352 2020-03-17 f0.1=0.99 FA/10k=0.000
12
+ #dent -> dans; 10000000 # p=0.986 r=0.641 f0.5=0.890 352+217 2020-03-17 f0.1=0.98 FA/10k=0.837 (commented due to precision < 0.99)
13
+ don -> donc; 1000 # p=1.000 r=0.763 f0.5=0.941 60+636 2020-03-17 f0.1=1.00 FA/10k=0.000
14
+ donc -> don; 1000 # p=1.000 r=0.650 f0.5=0.903 636+60 2020-03-17 f0.1=0.99 FA/10k=0.000
15
+ #donc -> dont; 1000 # p=0.962 r=0.260 f0.5=0.625 636+780 2020-03-17 f0.1=0.94 FA/10k=0.572 (commented due to precision < 0.99)
16
+ #dont -> donc; 10000000 # p=0.985 r=0.506 f0.5=0.828 780+636 2020-03-17 f0.1=0.98 FA/10k=0.226 (commented due to precision < 0.99)
17
+ #il -> ils; 10 # p=1.000 r=0.581 f0.5=0.874 69+136 2020-03-17 f0.1=0.99 FA/10k=0.000
18
+ #ils -> il; 100000 # p=1.000 r=0.406 f0.5=0.773 136+69 2020-03-17 f0.1=0.99 FA/10k=0.000
19
+ #mais -> mai; 10000 # p=1.000 r=0.762 f0.5=0.941 336+404 2020-03-17 f0.1=1.00 FA/10k=0.000
20
+ moi -> mois; 1000 # p=1.000 r=0.689 f0.5=0.917 513+592 2020-03-17 f0.1=1.00 FA/10k=0.000
21
+ mois -> moi; 10000 # p=0.996 r=0.493 f0.5=0.827 592+513 2020-03-17 f0.1=0.99 FA/10k=0.017
22
+ #nom -> non; 100000 # p=0.965 r=0.407 f0.5=0.757 305+273 2020-03-17 f0.1=0.95 FA/10k=0.323 (commented due to precision < 0.99)
23
+ notre -> nôtre; 10000000 # p=1.000 r=0.767 f0.5=0.943 184+519 2020-03-17 f0.1=1.00 FA/10k=0.000
24
+ #nôtre -> notre; 1000000 # p=0.967 r=0.788 f0.5=0.925 519+184 2020-03-17 f0.1=0.96 FA/10k=0.153 (commented due to precision < 0.99)
25
+ pain -> pin; 10 # p=1.000 r=0.276 f0.5=0.656 407+228 2020-03-17 f0.1=0.97 FA/10k=0.000
26
+ #pin -> pain; 100000 # p=0.986 r=0.351 f0.5=0.724 228+407 2020-03-17 f0.1=0.97 FA/10k=0.004 (commented due to precision < 0.99)
27
+ #paire -> père; 10000000 # p=0.976 r=0.653 f0.5=0.888 741+570 2020-03-17 f0.1=0.97 FA/10k=0.033 (commented due to precision < 0.99)
28
+ père -> paire; 100 # p=1.000 r=0.796 f0.5=0.951 570+741 2020-03-17 f0.1=1.00 FA/10k=0.000
29
+ peau -> pot; 10 # p=0.996 r=0.658 f0.5=0.903 486+392 2020-03-17 f0.1=0.99 FA/10k=0.001
30
+ pot -> peau; 100000 # p=1.000 r=0.831 f0.5=0.961 392+486 2020-03-17 f0.1=1.00 FA/10k=0.000
31
+ #pris -> prix; 10000 # p=0.959 r=0.338 f0.5=0.701 350+139 2020-03-17 f0.1=0.94 FA/10k=0.152 (commented due to precision < 0.99)
32
+ prix -> pris; 10 # p=1.000 r=0.880 f0.5=0.973 139+350 2020-03-17 f0.1=1.00 FA/10k=0.000
33
+ #quand -> quant; 10 # p=1.000 r=0.937 f0.5=0.987 303+474 2020-03-17 f0.1=1.00 FA/10k=0.000 commented out due to false positive
34
+ #quant -> quand; 100000 # p=0.988 r=0.785 f0.5=0.939 474+303 2020-03-17 f0.1=0.99 FA/10k=0.038 (commented due to precision < 0.99)
35
+ #sais -> sait; 10 # p=0.998 r=0.773 f0.5=0.943 359+797 2020-03-17 f0.1=1.00 FA/10k=0.003 commented due to false positives
36
+ #sait -> sais; 1000 # p=1.000 r=0.471 f0.5=0.816 797+359 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
37
+ tante -> tente; 1000 # p=0.997 r=0.758 f0.5=0.938 352+890 2020-03-17 f0.1=0.99 FA/10k=0.001
38
+ tente -> tante; 1000 # p=1.000 r=0.210 f0.5=0.571 890+352 2020-03-17 f0.1=0.96 FA/10k=0.000
39
+ toi -> toit; 10 # p=1.000 r=0.826 f0.5=0.959 497+728 2020-03-17 f0.1=1.00 FA/10k=0.000
40
+ #toit -> toi; 1000000 # p=0.985 r=0.400 f0.5=0.762 728+497 2020-03-17 f0.1=0.97 FA/10k=0.008 (commented due to precision < 0.99)
41
+ #trait -> très; 100000 # p=0.992 r=0.851 f0.5=0.960 567+596 2020-03-17 f0.1=0.99 FA/10k=0.061 (commented due to estimated false alarms rate > 0.025)
42
+ très -> trait; 10000 # p=1.000 r=0.519 f0.5=0.843 596+567 2020-03-17 f0.1=0.99 FA/10k=0.000
43
+ vain -> vin; 1000 # p=0.990 r=0.785 f0.5=0.941 810+506 2020-03-17 f0.1=0.99 FA/10k=0.005
44
+ vin -> vain; 100 # p=0.999 r=0.951 f0.5=0.989 506+810 2020-03-17 f0.1=1.00 FA/10k=0.001
45
+ vain -> vingt; 100000 # p=1.000 r=0.429 f0.5=0.790 810+445 2020-03-17 f0.1=0.99 FA/10k=0.000
46
+ vingt -> vain; 10 # p=1.000 r=0.969 f0.5=0.994 445+810 2020-03-17 f0.1=1.00 FA/10k=0.000
47
+ vin -> vingt; 10000 # p=0.995 r=0.432 f0.5=0.789 503+444 2020-03-17 f0.1=0.98 FA/10k=0.006
48
+ vingt -> vin; 100 # p=0.995 r=0.805 f0.5=0.950 444+503 2020-03-17 f0.1=0.99 FA/10k=0.006
49
+ #ver -> verre; 10000 # p=0.968 r=0.602 f0.5=0.863 439+708 2020-03-17 f0.1=0.96 FA/10k=0.009 (commented due to precision < 0.99)
50
+ verre -> vers; 1000000 # p=1.000 r=0.705 f0.5=0.923 706+474 2020-03-17 f0.1=1.00 FA/10k=0.000
51
+ #ver -> vers; 1000000 # p=0.979 r=0.694 f0.5=0.905 438+481 2020-03-17 f0.1=0.98 FA/10k=0.080 (commented due to precision < 0.99)
52
+ #ver -> vert; 1000 # p=0.971 r=0.556 f0.5=0.845 438+666 2020-03-17 f0.1=0.96 FA/10k=0.007 (commented due to precision < 0.99)
53
+ #vert -> verre; 100 # p=0.991 r=0.745 f0.5=0.929 664+707 2020-03-17 f0.1=0.99 FA/10k=0.004
54
+ vert -> verre; 10000; # p=0.993, r=0.420, f0.5=0.780, 664+707, 3grams, 2020-03-17
55
+ verre -> vers; 1000000 # p=1.000 r=0.705 f0.5=0.923 706+474 2020-03-17 f0.1=1.00 FA/10k=0.000
56
+ #vers -> verre; 100 # p=1.000 r=0.677 f0.5=0.913 474+706 2020-03-17 f0.1=1.00 FA/10k=0.000
57
+ vers -> verre; 10000; # p=1.000, r=0.380, f0.5=0.754, 474+706, 3grams, 2020-03-17
58
+ #vers -> vert; 100 # p=1.000 r=0.306 f0.5=0.688 474+661 2020-03-17 f0.1=0.98 FA/10k=0.000
59
+ #vert -> vers; 10000000 # p=0.991 r=0.667 f0.5=0.903 661+474 2020-03-17 f0.1=0.99 FA/10k=0.036 (commented due to estimated false alarms rate > 0.025)
60
+ verre -> vert; 100 # p=0.990 r=0.574 f0.5=0.864 707+664 2020-03-17 f0.1=0.98 FA/10k=0.004
61
+ #votre -> vôtre; 10 # p=0.992 r=0.939 f0.5=0.981 325+872 2020-03-17 f0.1=0.99 FA/10k=0.010 commented due to false positives
62
+
63
+ # commented out, maybe precision still not high enough for these?:
64
+ #an; en; 1000000 # p=0.997, r=0.803, 1000+529, 3grams, 2016-03-29
65
+ #à; a; 10000000 # p=0.999, r=0.768, 1000+1000, 3grams, 2016-03-29
66
+ #ces; ses; 100000 # p=0.986, r=0.218, 909+990, 3grams, 2016-03-29
67
+ #cor; corps; 10000000 # p=0.997, r=0.790, 21+437, 3grams, 2016-03-30
68
+ #cours; court; 10000000 # p=0.995, r=0.630, 866+301, 3grams, 2016-03-30
69
+ #maire; mer; 10000000 # p=0.998, r=0.692, 212+528, 3grams, 2016-03-30
70
+ #maire; mére; 10000000 # p=0.998, r=0.694, 212+994, 3grams, 2016-03-30
71
+ #mer; mére; 10000000 # p=0.999, r=0.518, 528+992, 3grams, 2016-03-30
72
+ #par; part; 10000000 # p=0.997, r=0.798, 978+843, 3grams, 2016-03-30
73
+ #parti; partie; 10000000 # p=0.999, r=0.694, 995+983, 3grams, 2016-03-30
74
+ #saint;sen; 10000000 # p=0.998, r=0.333, 997+249, 3grams, 2016-03-30
75
+ #sur; sûr; 10000000 # p=0.999, r=0.741, 999+469, 3grams, 2016-03-30
76
+ #tant; temps; 10000000 # p=0.999, r=0.587, 587+1000, 3grams, 2016-03-30