kotoshu 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/.rubocop.yml +18 -0
- data/CHANGELOG.md +182 -0
- data/CLAUDE.md +172 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE +31 -0
- data/README.adoc +955 -0
- data/Rakefile +12 -0
- data/SECURITY.md +93 -0
- data/examples/01_basic_word_checking.rb +38 -0
- data/examples/02_text_document_checking.rb +77 -0
- data/examples/03_dictionary_backends.rb +137 -0
- data/examples/04_trie_data_structure.rb +146 -0
- data/examples/05_suggestion_algorithms.rb +239 -0
- data/examples/06_configuration_advanced.rb +287 -0
- data/examples/07_multi_language_dictionaries.rb +278 -0
- data/exe/kotoshu +6 -0
- data/lib/kotoshu/algorithms/capitalization.rb +276 -0
- data/lib/kotoshu/algorithms/lookup.rb +876 -0
- data/lib/kotoshu/algorithms/ngram_suggest.rb +270 -0
- data/lib/kotoshu/algorithms/permutations.rb +283 -0
- data/lib/kotoshu/algorithms/phonet_suggest.rb +167 -0
- data/lib/kotoshu/algorithms/suggest.rb +575 -0
- data/lib/kotoshu/algorithms.rb +14 -0
- data/lib/kotoshu/analyzers/semantic_analyzer.rb +295 -0
- data/lib/kotoshu/cache/base_cache.rb +596 -0
- data/lib/kotoshu/cache/cache.rb +91 -0
- data/lib/kotoshu/cache/frequency_cache.rb +224 -0
- data/lib/kotoshu/cache/language_cache.rb +454 -0
- data/lib/kotoshu/cache/lookup_cache.rb +166 -0
- data/lib/kotoshu/cache/model_cache.rb +513 -0
- data/lib/kotoshu/cache/suggestion_cache.rb +113 -0
- data/lib/kotoshu/cache.rb +40 -0
- data/lib/kotoshu/cli/auto_setup.rb +71 -0
- data/lib/kotoshu/cli/batch_reporter.rb +315 -0
- data/lib/kotoshu/cli/cache_command.rb +356 -0
- data/lib/kotoshu/cli/display_formatter.rb +431 -0
- data/lib/kotoshu/cli/errors.rb +36 -0
- data/lib/kotoshu/cli/interactive_reviewer.rb +319 -0
- data/lib/kotoshu/cli/language_resolver.rb +91 -0
- data/lib/kotoshu/cli/navigation_manager.rb +272 -0
- data/lib/kotoshu/cli/progress_reporter.rb +114 -0
- data/lib/kotoshu/cli/status_report.rb +130 -0
- data/lib/kotoshu/cli.rb +627 -0
- data/lib/kotoshu/commands/cache_command.rb +424 -0
- data/lib/kotoshu/commands/check_command.rb +312 -0
- data/lib/kotoshu/commands/model_command.rb +295 -0
- data/lib/kotoshu/components/passthrough_spell_checker.rb +72 -0
- data/lib/kotoshu/components/pos_tagger.rb +98 -0
- data/lib/kotoshu/components/spell_checker.rb +73 -0
- data/lib/kotoshu/components/synthesizer.rb +60 -0
- data/lib/kotoshu/components/tokenizer.rb +58 -0
- data/lib/kotoshu/components/whitespace_tokenizer.rb +96 -0
- data/lib/kotoshu/configuration/builder.rb +209 -0
- data/lib/kotoshu/configuration/resolver.rb +124 -0
- data/lib/kotoshu/configuration.rb +702 -0
- data/lib/kotoshu/core/exceptions.rb +165 -0
- data/lib/kotoshu/core/indexed_dictionary.rb +291 -0
- data/lib/kotoshu/core/models/affix_rule.rb +260 -0
- data/lib/kotoshu/core/models/result/document_result.rb +263 -0
- data/lib/kotoshu/core/models/result/word_result.rb +203 -0
- data/lib/kotoshu/core/models/word.rb +142 -0
- data/lib/kotoshu/core/trie/builder.rb +119 -0
- data/lib/kotoshu/core/trie/node.rb +94 -0
- data/lib/kotoshu/core/trie/trie.rb +249 -0
- data/lib/kotoshu/core.rb +28 -0
- data/lib/kotoshu/data/common_words/de.yml +1800 -0
- data/lib/kotoshu/data/common_words/en.yml +1215 -0
- data/lib/kotoshu/data/common_words/es.yml +750 -0
- data/lib/kotoshu/data/common_words/fr.yml +1015 -0
- data/lib/kotoshu/data/common_words/pt.yml +870 -0
- data/lib/kotoshu/data/common_words/ru.yml +484 -0
- data/lib/kotoshu/data/common_words_loader.rb +152 -0
- data/lib/kotoshu/data_structures/bloom_filter.rb +176 -0
- data/lib/kotoshu/debug_logger.rb +146 -0
- data/lib/kotoshu/debug_mode.rb +134 -0
- data/lib/kotoshu/defaults.rb +86 -0
- data/lib/kotoshu/dictionaries/catalog.rb +817 -0
- data/lib/kotoshu/dictionary/base.rb +237 -0
- data/lib/kotoshu/dictionary/cspell.rb +254 -0
- data/lib/kotoshu/dictionary/custom.rb +224 -0
- data/lib/kotoshu/dictionary/hunspell.rb +526 -0
- data/lib/kotoshu/dictionary/plain_text.rb +282 -0
- data/lib/kotoshu/dictionary/repository.rb +248 -0
- data/lib/kotoshu/dictionary/unified.rb +260 -0
- data/lib/kotoshu/dictionary/unix_words.rb +218 -0
- data/lib/kotoshu/documents/asciidoc_document.rb +441 -0
- data/lib/kotoshu/documents/document.rb +229 -0
- data/lib/kotoshu/documents/location.rb +139 -0
- data/lib/kotoshu/documents/markdown_document.rb +389 -0
- data/lib/kotoshu/documents/plain_text_document.rb +147 -0
- data/lib/kotoshu/embeddings/embedding_pipeline.rb +244 -0
- data/lib/kotoshu/embeddings/lru_cache.rb +233 -0
- data/lib/kotoshu/embeddings/onnx_runtime_model.rb +388 -0
- data/lib/kotoshu/embeddings/protocol.rb +83 -0
- data/lib/kotoshu/embeddings/protocols.rb +17 -0
- data/lib/kotoshu/embeddings/registry.rb +182 -0
- data/lib/kotoshu/embeddings/search.rb +192 -0
- data/lib/kotoshu/embeddings/similarity_engine.rb +248 -0
- data/lib/kotoshu/embeddings/similarity_search.rb +331 -0
- data/lib/kotoshu/embeddings/vocabulary.rb +257 -0
- data/lib/kotoshu/embeddings.rb +97 -0
- data/lib/kotoshu/fluent_checker.rb +91 -0
- data/lib/kotoshu/grammar/pattern_matchers/base_matcher.rb +48 -0
- data/lib/kotoshu/grammar/pattern_matchers/double_negative_matcher.rb +105 -0
- data/lib/kotoshu/grammar/pattern_matchers/possessive_context_matcher.rb +77 -0
- data/lib/kotoshu/grammar/pattern_matchers/vowel_sound_matcher.rb +83 -0
- data/lib/kotoshu/grammar/rule.rb +95 -0
- data/lib/kotoshu/grammar/rule_engine.rb +111 -0
- data/lib/kotoshu/grammar/rule_loader.rb +31 -0
- data/lib/kotoshu/grammar.rb +18 -0
- data/lib/kotoshu/integrity/audit_log.rb +88 -0
- data/lib/kotoshu/integrity/manifest.rb +117 -0
- data/lib/kotoshu/integrity/net_http.rb +46 -0
- data/lib/kotoshu/integrity.rb +25 -0
- data/lib/kotoshu/keyboard/layout.rb +115 -0
- data/lib/kotoshu/keyboard/layouts/azerty.rb +57 -0
- data/lib/kotoshu/keyboard/layouts/dvorak.rb +56 -0
- data/lib/kotoshu/keyboard/layouts/jcuken.rb +59 -0
- data/lib/kotoshu/keyboard/layouts/qwerty.rb +54 -0
- data/lib/kotoshu/keyboard/layouts/qwertz.rb +57 -0
- data/lib/kotoshu/keyboard/registry.rb +146 -0
- data/lib/kotoshu/keyboard.rb +60 -0
- data/lib/kotoshu/language/detector.rb +242 -0
- data/lib/kotoshu/language/identifier.rb +378 -0
- data/lib/kotoshu/language/languages/base.rb +256 -0
- data/lib/kotoshu/language/normalizer/base.rb +137 -0
- data/lib/kotoshu/language/registry.rb +147 -0
- data/lib/kotoshu/language/resources/ar/common_words.txt +6753 -0
- data/lib/kotoshu/language/resources/ar/confusion_sets.txt +11 -0
- data/lib/kotoshu/language/resources/de/common_words.txt +10003 -0
- data/lib/kotoshu/language/resources/de/confusion_sets.txt +246 -0
- data/lib/kotoshu/language/resources/en/common_words.txt +9979 -0
- data/lib/kotoshu/language/resources/en/confusion_sets.txt +871 -0
- data/lib/kotoshu/language/resources/es/common_words.txt +9992 -0
- data/lib/kotoshu/language/resources/es/confusion_sets.txt +17 -0
- data/lib/kotoshu/language/resources/fr/common_words.txt +9993 -0
- data/lib/kotoshu/language/resources/fr/confusion_sets.txt +76 -0
- data/lib/kotoshu/language/resources/pt/common_words.txt +9977 -0
- data/lib/kotoshu/language/resources/pt/confusion_sets.txt +18 -0
- data/lib/kotoshu/language/resources/ru/common_words.txt +9951 -0
- data/lib/kotoshu/language/resources/ru/confusion_sets.txt +5 -0
- data/lib/kotoshu/language/tokenizer/base.rb +170 -0
- data/lib/kotoshu/language/tokenizer/french_tokenizer.rb +170 -0
- data/lib/kotoshu/language/tokenizer/german_tokenizer.rb +41 -0
- data/lib/kotoshu/language/tokenizer/japanese_tokenizer.rb +60 -0
- data/lib/kotoshu/language/tokenizer/latin_tokenizer.rb +141 -0
- data/lib/kotoshu/language/tokenizer/portuguese_tokenizer.rb +160 -0
- data/lib/kotoshu/language/tokenizer/russian_tokenizer.rb +95 -0
- data/lib/kotoshu/language/tokenizer/spanish_tokenizer.rb +122 -0
- data/lib/kotoshu/language.rb +99 -0
- data/lib/kotoshu/languages/de/language.rb +546 -0
- data/lib/kotoshu/languages/en/language.rb +448 -0
- data/lib/kotoshu/languages/es/language.rb +459 -0
- data/lib/kotoshu/languages/fr/language.rb +493 -0
- data/lib/kotoshu/languages/ja/language.rb +477 -0
- data/lib/kotoshu/languages/pt/language.rb +423 -0
- data/lib/kotoshu/languages/ru/language.rb +404 -0
- data/lib/kotoshu/languages.rb +43 -0
- data/lib/kotoshu/metrics_collector.rb +222 -0
- data/lib/kotoshu/metrics_module.rb +110 -0
- data/lib/kotoshu/models/context.rb +119 -0
- data/lib/kotoshu/models/embedding_model.rb +182 -0
- data/lib/kotoshu/models/fasttext_model.rb +220 -0
- data/lib/kotoshu/models/nearest_neighbor.rb +87 -0
- data/lib/kotoshu/models/onnx_model.rb +333 -0
- data/lib/kotoshu/models/semantic_error.rb +165 -0
- data/lib/kotoshu/models/suggestion.rb +106 -0
- data/lib/kotoshu/models/word_embedding.rb +107 -0
- data/lib/kotoshu/paths.rb +53 -0
- data/lib/kotoshu/personal_dictionary.rb +94 -0
- data/lib/kotoshu/plugins/plugin.rb +61 -0
- data/lib/kotoshu/plugins/registry.rb +120 -0
- data/lib/kotoshu/project_config.rb +76 -0
- data/lib/kotoshu/readers/aff_data.rb +356 -0
- data/lib/kotoshu/readers/aff_reader.rb +375 -0
- data/lib/kotoshu/readers/condition_checker.rb +142 -0
- data/lib/kotoshu/readers/dic_reader.rb +118 -0
- data/lib/kotoshu/readers/file_reader.rb +347 -0
- data/lib/kotoshu/readers/lookup_builder.rb +299 -0
- data/lib/kotoshu/readers/readers.rb +6 -0
- data/lib/kotoshu/readers.rb +9 -0
- data/lib/kotoshu/resource_bundle.rb +30 -0
- data/lib/kotoshu/resource_manager.rb +295 -0
- data/lib/kotoshu/results/result.rb +165 -0
- data/lib/kotoshu/scripts/fasttext_to_onnx.py +275 -0
- data/lib/kotoshu/source_registry.rb +74 -0
- data/lib/kotoshu/spellchecker/parallel_checker.rb +90 -0
- data/lib/kotoshu/spellchecker.rb +298 -0
- data/lib/kotoshu/string_metrics.rb +153 -0
- data/lib/kotoshu/suggestions/context.rb +55 -0
- data/lib/kotoshu/suggestions/generator.rb +175 -0
- data/lib/kotoshu/suggestions/pipeline.rb +135 -0
- data/lib/kotoshu/suggestions/strategies/base_strategy.rb +296 -0
- data/lib/kotoshu/suggestions/strategies/composite_strategy.rb +140 -0
- data/lib/kotoshu/suggestions/strategies/edit_distance_strategy.rb +671 -0
- data/lib/kotoshu/suggestions/strategies/keyboard_proximity_strategy.rb +228 -0
- data/lib/kotoshu/suggestions/strategies/ngram_strategy.rb +130 -0
- data/lib/kotoshu/suggestions/strategies/phonetic_strategy.rb +329 -0
- data/lib/kotoshu/suggestions/strategies/semantic_strategy.rb +316 -0
- data/lib/kotoshu/suggestions/strategies/symspell_strategy.rb +275 -0
- data/lib/kotoshu/suggestions/suggestion.rb +174 -0
- data/lib/kotoshu/suggestions/suggestion_set.rb +238 -0
- data/lib/kotoshu/version.rb +5 -0
- data/lib/kotoshu.rb +493 -0
- data/script/validate_all_dictionaries.rb +444 -0
- data/sig/kotoshu.rbs +4 -0
- data/test_oop.rb +79 -0
- metadata +298 -0
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# See the English 'confusion_sets.txt' for a description of file format
|
|
2
|
+
|
|
3
|
+
bon -> bond; 100 # p=1.000 r=0.190 f0.5=0.541 347+21 2020-03-17 f0.1=0.96 FA/10k=0.000
|
|
4
|
+
bond -> bon; 10000 # p=1.000 r=0.790 f0.5=0.949 21+347 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
5
|
+
#ce -> se; 100 # p=0.997 r=0.933 f0.5=0.984 337+690 2020-03-17 f0.1=1.00 FA/10k=0.179 (commented due to estimated false alarms rate > 0.025)
|
|
6
|
+
#se -> ce; 100 # p=1.000 r=0.760 f0.5=0.940 690+337 2020-03-17 f0.1=1.00 FA/10k=0.000 commented due to false positives
|
|
7
|
+
#cent -> sans; 10000 # p=0.997 r=0.800 f0.5=0.951 242+466 2020-03-17 f0.1=0.99 FA/10k=0.031 (commented due to estimated false alarms rate > 0.025)
|
|
8
|
+
#sans -> cent; 1000000 # p=1.000 r=0.463 f0.5=0.812 466+242 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
|
|
9
|
+
#cette -> sept; 1000 # p=1.000 r=0.624 f0.5=0.892 360+404 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
|
|
10
|
+
#sept -> cette; 1000000 # p=1.000 r=0.908 f0.5=0.980 404+360 2020-03-17 f0.1=1.00 FA/10k=0.000 commented due to false positives
|
|
11
|
+
dans -> dent; 10000 # p=1.000 r=0.656 f0.5=0.905 217+352 2020-03-17 f0.1=0.99 FA/10k=0.000
|
|
12
|
+
#dent -> dans; 10000000 # p=0.986 r=0.641 f0.5=0.890 352+217 2020-03-17 f0.1=0.98 FA/10k=0.837 (commented due to precision < 0.99)
|
|
13
|
+
don -> donc; 1000 # p=1.000 r=0.763 f0.5=0.941 60+636 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
14
|
+
donc -> don; 1000 # p=1.000 r=0.650 f0.5=0.903 636+60 2020-03-17 f0.1=0.99 FA/10k=0.000
|
|
15
|
+
#donc -> dont; 1000 # p=0.962 r=0.260 f0.5=0.625 636+780 2020-03-17 f0.1=0.94 FA/10k=0.572 (commented due to precision < 0.99)
|
|
16
|
+
#dont -> donc; 10000000 # p=0.985 r=0.506 f0.5=0.828 780+636 2020-03-17 f0.1=0.98 FA/10k=0.226 (commented due to precision < 0.99)
|
|
17
|
+
#il -> ils; 10 # p=1.000 r=0.581 f0.5=0.874 69+136 2020-03-17 f0.1=0.99 FA/10k=0.000
|
|
18
|
+
#ils -> il; 100000 # p=1.000 r=0.406 f0.5=0.773 136+69 2020-03-17 f0.1=0.99 FA/10k=0.000
|
|
19
|
+
#mais -> mai; 10000 # p=1.000 r=0.762 f0.5=0.941 336+404 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
20
|
+
moi -> mois; 1000 # p=1.000 r=0.689 f0.5=0.917 513+592 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
21
|
+
mois -> moi; 10000 # p=0.996 r=0.493 f0.5=0.827 592+513 2020-03-17 f0.1=0.99 FA/10k=0.017
|
|
22
|
+
#nom -> non; 100000 # p=0.965 r=0.407 f0.5=0.757 305+273 2020-03-17 f0.1=0.95 FA/10k=0.323 (commented due to precision < 0.99)
|
|
23
|
+
notre -> nôtre; 10000000 # p=1.000 r=0.767 f0.5=0.943 184+519 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
24
|
+
#nôtre -> notre; 1000000 # p=0.967 r=0.788 f0.5=0.925 519+184 2020-03-17 f0.1=0.96 FA/10k=0.153 (commented due to precision < 0.99)
|
|
25
|
+
pain -> pin; 10 # p=1.000 r=0.276 f0.5=0.656 407+228 2020-03-17 f0.1=0.97 FA/10k=0.000
|
|
26
|
+
#pin -> pain; 100000 # p=0.986 r=0.351 f0.5=0.724 228+407 2020-03-17 f0.1=0.97 FA/10k=0.004 (commented due to precision < 0.99)
|
|
27
|
+
#paire -> père; 10000000 # p=0.976 r=0.653 f0.5=0.888 741+570 2020-03-17 f0.1=0.97 FA/10k=0.033 (commented due to precision < 0.99)
|
|
28
|
+
père -> paire; 100 # p=1.000 r=0.796 f0.5=0.951 570+741 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
29
|
+
peau -> pot; 10 # p=0.996 r=0.658 f0.5=0.903 486+392 2020-03-17 f0.1=0.99 FA/10k=0.001
|
|
30
|
+
pot -> peau; 100000 # p=1.000 r=0.831 f0.5=0.961 392+486 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
31
|
+
#pris -> prix; 10000 # p=0.959 r=0.338 f0.5=0.701 350+139 2020-03-17 f0.1=0.94 FA/10k=0.152 (commented due to precision < 0.99)
|
|
32
|
+
prix -> pris; 10 # p=1.000 r=0.880 f0.5=0.973 139+350 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
33
|
+
#quand -> quant; 10 # p=1.000 r=0.937 f0.5=0.987 303+474 2020-03-17 f0.1=1.00 FA/10k=0.000 commented out due to false positive
|
|
34
|
+
#quant -> quand; 100000 # p=0.988 r=0.785 f0.5=0.939 474+303 2020-03-17 f0.1=0.99 FA/10k=0.038 (commented due to precision < 0.99)
|
|
35
|
+
#sais -> sait; 10 # p=0.998 r=0.773 f0.5=0.943 359+797 2020-03-17 f0.1=1.00 FA/10k=0.003 commented due to false positives
|
|
36
|
+
#sait -> sais; 1000 # p=1.000 r=0.471 f0.5=0.816 797+359 2020-03-17 f0.1=0.99 FA/10k=0.000 commented due to false positives
|
|
37
|
+
tante -> tente; 1000 # p=0.997 r=0.758 f0.5=0.938 352+890 2020-03-17 f0.1=0.99 FA/10k=0.001
|
|
38
|
+
tente -> tante; 1000 # p=1.000 r=0.210 f0.5=0.571 890+352 2020-03-17 f0.1=0.96 FA/10k=0.000
|
|
39
|
+
toi -> toit; 10 # p=1.000 r=0.826 f0.5=0.959 497+728 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
40
|
+
#toit -> toi; 1000000 # p=0.985 r=0.400 f0.5=0.762 728+497 2020-03-17 f0.1=0.97 FA/10k=0.008 (commented due to precision < 0.99)
|
|
41
|
+
#trait -> très; 100000 # p=0.992 r=0.851 f0.5=0.960 567+596 2020-03-17 f0.1=0.99 FA/10k=0.061 (commented due to estimated false alarms rate > 0.025)
|
|
42
|
+
très -> trait; 10000 # p=1.000 r=0.519 f0.5=0.843 596+567 2020-03-17 f0.1=0.99 FA/10k=0.000
|
|
43
|
+
vain -> vin; 1000 # p=0.990 r=0.785 f0.5=0.941 810+506 2020-03-17 f0.1=0.99 FA/10k=0.005
|
|
44
|
+
vin -> vain; 100 # p=0.999 r=0.951 f0.5=0.989 506+810 2020-03-17 f0.1=1.00 FA/10k=0.001
|
|
45
|
+
vain -> vingt; 100000 # p=1.000 r=0.429 f0.5=0.790 810+445 2020-03-17 f0.1=0.99 FA/10k=0.000
|
|
46
|
+
vingt -> vain; 10 # p=1.000 r=0.969 f0.5=0.994 445+810 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
47
|
+
vin -> vingt; 10000 # p=0.995 r=0.432 f0.5=0.789 503+444 2020-03-17 f0.1=0.98 FA/10k=0.006
|
|
48
|
+
vingt -> vin; 100 # p=0.995 r=0.805 f0.5=0.950 444+503 2020-03-17 f0.1=0.99 FA/10k=0.006
|
|
49
|
+
#ver -> verre; 10000 # p=0.968 r=0.602 f0.5=0.863 439+708 2020-03-17 f0.1=0.96 FA/10k=0.009 (commented due to precision < 0.99)
|
|
50
|
+
verre -> vers; 1000000 # p=1.000 r=0.705 f0.5=0.923 706+474 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
51
|
+
#ver -> vers; 1000000 # p=0.979 r=0.694 f0.5=0.905 438+481 2020-03-17 f0.1=0.98 FA/10k=0.080 (commented due to precision < 0.99)
|
|
52
|
+
#ver -> vert; 1000 # p=0.971 r=0.556 f0.5=0.845 438+666 2020-03-17 f0.1=0.96 FA/10k=0.007 (commented due to precision < 0.99)
|
|
53
|
+
#vert -> verre; 100 # p=0.991 r=0.745 f0.5=0.929 664+707 2020-03-17 f0.1=0.99 FA/10k=0.004
|
|
54
|
+
vert -> verre; 10000; # p=0.993, r=0.420, f0.5=0.780, 664+707, 3grams, 2020-03-17
|
|
55
|
+
verre -> vers; 1000000 # p=1.000 r=0.705 f0.5=0.923 706+474 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
56
|
+
#vers -> verre; 100 # p=1.000 r=0.677 f0.5=0.913 474+706 2020-03-17 f0.1=1.00 FA/10k=0.000
|
|
57
|
+
vers -> verre; 10000; # p=1.000, r=0.380, f0.5=0.754, 474+706, 3grams, 2020-03-17
|
|
58
|
+
#vers -> vert; 100 # p=1.000 r=0.306 f0.5=0.688 474+661 2020-03-17 f0.1=0.98 FA/10k=0.000
|
|
59
|
+
#vert -> vers; 10000000 # p=0.991 r=0.667 f0.5=0.903 661+474 2020-03-17 f0.1=0.99 FA/10k=0.036 (commented due to estimated false alarms rate > 0.025)
|
|
60
|
+
verre -> vert; 100 # p=0.990 r=0.574 f0.5=0.864 707+664 2020-03-17 f0.1=0.98 FA/10k=0.004
|
|
61
|
+
#votre -> vôtre; 10 # p=0.992 r=0.939 f0.5=0.981 325+872 2020-03-17 f0.1=0.99 FA/10k=0.010 commented due to false positives
|
|
62
|
+
|
|
63
|
+
# commented out, maybe precision still not high enough for these?:
|
|
64
|
+
#an; en; 1000000 # p=0.997, r=0.803, 1000+529, 3grams, 2016-03-29
|
|
65
|
+
#à; a; 10000000 # p=0.999, r=0.768, 1000+1000, 3grams, 2016-03-29
|
|
66
|
+
#ces; ses; 100000 # p=0.986, r=0.218, 909+990, 3grams, 2016-03-29
|
|
67
|
+
#cor; corps; 10000000 # p=0.997, r=0.790, 21+437, 3grams, 2016-03-30
|
|
68
|
+
#cours; court; 10000000 # p=0.995, r=0.630, 866+301, 3grams, 2016-03-30
|
|
69
|
+
#maire; mer; 10000000 # p=0.998, r=0.692, 212+528, 3grams, 2016-03-30
|
|
70
|
+
#maire; mére; 10000000 # p=0.998, r=0.694, 212+994, 3grams, 2016-03-30
|
|
71
|
+
#mer; mére; 10000000 # p=0.999, r=0.518, 528+992, 3grams, 2016-03-30
|
|
72
|
+
#par; part; 10000000 # p=0.997, r=0.798, 978+843, 3grams, 2016-03-30
|
|
73
|
+
#parti; partie; 10000000 # p=0.999, r=0.694, 995+983, 3grams, 2016-03-30
|
|
74
|
+
#saint;sen; 10000000 # p=0.998, r=0.333, 997+249, 3grams, 2016-03-30
|
|
75
|
+
#sur; sûr; 10000000 # p=0.999, r=0.741, 999+469, 3grams, 2016-03-30
|
|
76
|
+
#tant; temps; 10000000 # p=0.999, r=0.587, 587+1000, 3grams, 2016-03-30
|