fasttext 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -1
- data/LICENSE.txt +18 -18
- data/README.md +39 -12
- data/ext/fasttext/ext.cpp +108 -101
- data/ext/fasttext/extconf.rb +7 -9
- data/lib/fasttext.rb +3 -0
- data/lib/fasttext/classifier.rb +25 -7
- data/lib/fasttext/vectorizer.rb +7 -2
- data/lib/fasttext/version.rb +1 -1
- data/vendor/fastText/README.md +3 -3
- data/vendor/fastText/src/args.cc +179 -6
- data/vendor/fastText/src/args.h +29 -1
- data/vendor/fastText/src/autotune.cc +477 -0
- data/vendor/fastText/src/autotune.h +89 -0
- data/vendor/fastText/src/densematrix.cc +27 -7
- data/vendor/fastText/src/densematrix.h +10 -2
- data/vendor/fastText/src/fasttext.cc +125 -114
- data/vendor/fastText/src/fasttext.h +31 -52
- data/vendor/fastText/src/main.cc +32 -13
- data/vendor/fastText/src/meter.cc +148 -2
- data/vendor/fastText/src/meter.h +24 -2
- data/vendor/fastText/src/model.cc +0 -1
- data/vendor/fastText/src/real.h +0 -1
- data/vendor/fastText/src/utils.cc +25 -0
- data/vendor/fastText/src/utils.h +29 -0
- data/vendor/fastText/src/vector.cc +0 -1
- metadata +16 -539
- data/lib/fasttext/ext.bundle +0 -0
- data/vendor/fastText/CMakeLists.txt +0 -68
- data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
- data/vendor/fastText/CONTRIBUTING.md +0 -32
- data/vendor/fastText/MANIFEST.in +0 -5
- data/vendor/fastText/Makefile +0 -63
- data/vendor/fastText/alignment/README.md +0 -53
- data/vendor/fastText/alignment/align.py +0 -145
- data/vendor/fastText/alignment/eval.py +0 -60
- data/vendor/fastText/alignment/example.sh +0 -51
- data/vendor/fastText/alignment/unsup_align.py +0 -109
- data/vendor/fastText/alignment/utils.py +0 -154
- data/vendor/fastText/classification-example.sh +0 -41
- data/vendor/fastText/classification-results.sh +0 -94
- data/vendor/fastText/crawl/README.md +0 -26
- data/vendor/fastText/crawl/dedup.cc +0 -51
- data/vendor/fastText/crawl/download_crawl.sh +0 -57
- data/vendor/fastText/crawl/filter_dedup.sh +0 -13
- data/vendor/fastText/crawl/filter_utf8.cc +0 -105
- data/vendor/fastText/crawl/process_wet_file.sh +0 -30
- data/vendor/fastText/docs/aligned-vectors.md +0 -64
- data/vendor/fastText/docs/api.md +0 -6
- data/vendor/fastText/docs/cheatsheet.md +0 -66
- data/vendor/fastText/docs/crawl-vectors.md +0 -125
- data/vendor/fastText/docs/dataset.md +0 -6
- data/vendor/fastText/docs/english-vectors.md +0 -53
- data/vendor/fastText/docs/faqs.md +0 -63
- data/vendor/fastText/docs/language-identification.md +0 -47
- data/vendor/fastText/docs/options.md +0 -50
- data/vendor/fastText/docs/pretrained-vectors.md +0 -142
- data/vendor/fastText/docs/python-module.md +0 -314
- data/vendor/fastText/docs/references.md +0 -41
- data/vendor/fastText/docs/supervised-models.md +0 -54
- data/vendor/fastText/docs/supervised-tutorial.md +0 -349
- data/vendor/fastText/docs/support.md +0 -58
- data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
- data/vendor/fastText/eval.py +0 -95
- data/vendor/fastText/get-wikimedia.sh +0 -79
- data/vendor/fastText/python/README.md +0 -322
- data/vendor/fastText/python/README.rst +0 -406
- data/vendor/fastText/python/benchmarks/README.rst +0 -3
- data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
- data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
- data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
- data/vendor/fastText/quantization-example.sh +0 -40
- data/vendor/fastText/runtests.py +0 -60
- data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
- data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
- data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
- data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
- data/vendor/fastText/setup.cfg +0 -2
- data/vendor/fastText/setup.py +0 -203
- data/vendor/fastText/tests/fetch_test_data.sh +0 -202
- data/vendor/fastText/website/README.md +0 -6
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
- data/vendor/fastText/website/core/Footer.js +0 -127
- data/vendor/fastText/website/package.json +0 -12
- data/vendor/fastText/website/pages/en/index.js +0 -286
- data/vendor/fastText/website/sidebars.json +0 -18
- data/vendor/fastText/website/siteConfig.js +0 -102
- data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
- data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
- data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
- data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
- data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
- data/vendor/fastText/website/static/fasttext.css +0 -48
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +0 -57
- data/vendor/fastText/word-vector-example.sh +0 -39
|
@@ -0,0 +1,477 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
|
3
|
+
* All rights reserved.
|
|
4
|
+
*
|
|
5
|
+
* This source code is licensed under the MIT license found in the
|
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
|
7
|
+
*/
|
|
8
|
+
|
|
9
|
+
#include "autotune.h"
|
|
10
|
+
|
|
11
|
+
#include <algorithm>
|
|
12
|
+
#include <csignal>
|
|
13
|
+
#include <functional>
|
|
14
|
+
#include <iomanip>
|
|
15
|
+
#include <iostream>
|
|
16
|
+
#include <random>
|
|
17
|
+
#include <thread>
|
|
18
|
+
|
|
19
|
+
#define LOG_VAL(name, val) \
|
|
20
|
+
if (autotuneArgs.verbose > 2) { \
|
|
21
|
+
std::cout << #name " = " << val << std::endl; \
|
|
22
|
+
}
|
|
23
|
+
#define LOG_VAL_NAN(name, val) \
|
|
24
|
+
if (autotuneArgs.verbose > 2) { \
|
|
25
|
+
if (std::isnan(val)) { \
|
|
26
|
+
std::cout << #name " = NaN" << std::endl; \
|
|
27
|
+
} else { \
|
|
28
|
+
std::cout << #name " = " << val << std::endl; \
|
|
29
|
+
} \
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
namespace {
|
|
33
|
+
|
|
34
|
+
std::function<void()> interruptSignalHandler;
|
|
35
|
+
|
|
36
|
+
void signalHandler(int signal) {
|
|
37
|
+
if (signal == SIGINT) {
|
|
38
|
+
interruptSignalHandler();
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
class ElapsedTimeMarker {
|
|
43
|
+
std::chrono::steady_clock::time_point start_;
|
|
44
|
+
|
|
45
|
+
public:
|
|
46
|
+
ElapsedTimeMarker() {
|
|
47
|
+
start_ = std::chrono::steady_clock::now();
|
|
48
|
+
}
|
|
49
|
+
double getElapsed() {
|
|
50
|
+
return fasttext::utils::getDuration(
|
|
51
|
+
start_, std::chrono::steady_clock::now());
|
|
52
|
+
}
|
|
53
|
+
};
|
|
54
|
+
|
|
55
|
+
} // namespace
|
|
56
|
+
|
|
57
|
+
namespace fasttext {
|
|
58
|
+
|
|
59
|
+
constexpr double kUnknownBestScore = -1.0;
|
|
60
|
+
constexpr int kCutoffLimit = 256;
|
|
61
|
+
|
|
62
|
+
template <typename T>
|
|
63
|
+
T getArgGauss(
|
|
64
|
+
T val,
|
|
65
|
+
std::minstd_rand& rng,
|
|
66
|
+
double startSigma,
|
|
67
|
+
double endSigma,
|
|
68
|
+
double t,
|
|
69
|
+
bool linear) {
|
|
70
|
+
T returnValue;
|
|
71
|
+
const double stddev = startSigma -
|
|
72
|
+
((startSigma - endSigma) / 0.5) *
|
|
73
|
+
std::min(0.5, std::max((t - 0.25), 0.0));
|
|
74
|
+
|
|
75
|
+
std::normal_distribution<double> normal(0.0, stddev);
|
|
76
|
+
|
|
77
|
+
const double coeff = normal(rng);
|
|
78
|
+
double updateCoeff = 0.0;
|
|
79
|
+
|
|
80
|
+
if (linear) {
|
|
81
|
+
updateCoeff = coeff;
|
|
82
|
+
returnValue = static_cast<T>(updateCoeff + val);
|
|
83
|
+
} else {
|
|
84
|
+
updateCoeff = std::pow(2.0, coeff);
|
|
85
|
+
returnValue = static_cast<T>(updateCoeff * val);
|
|
86
|
+
}
|
|
87
|
+
|
|
88
|
+
return returnValue;
|
|
89
|
+
}
|
|
90
|
+
|
|
91
|
+
template <typename T>
|
|
92
|
+
T updateArgGauss(
|
|
93
|
+
T val,
|
|
94
|
+
T min,
|
|
95
|
+
T max,
|
|
96
|
+
double startSigma,
|
|
97
|
+
double endSigma,
|
|
98
|
+
double t,
|
|
99
|
+
bool linear,
|
|
100
|
+
std::minstd_rand& rng) {
|
|
101
|
+
T retVal = getArgGauss(val, rng, startSigma, endSigma, t, linear);
|
|
102
|
+
if (retVal > max) {
|
|
103
|
+
retVal = max;
|
|
104
|
+
}
|
|
105
|
+
if (retVal < min) {
|
|
106
|
+
retVal = min;
|
|
107
|
+
}
|
|
108
|
+
return retVal;
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
AutotuneStrategy::AutotuneStrategy(
|
|
112
|
+
const Args& originalArgs,
|
|
113
|
+
std::minstd_rand::result_type seed)
|
|
114
|
+
: bestArgs_(),
|
|
115
|
+
maxDuration_(originalArgs.autotuneDuration),
|
|
116
|
+
rng_(seed),
|
|
117
|
+
trials_(0),
|
|
118
|
+
bestMinnIndex_(0),
|
|
119
|
+
bestDsubExponent_(1),
|
|
120
|
+
bestNonzeroBucket_(2000000),
|
|
121
|
+
originalBucket_(originalArgs.bucket) {
|
|
122
|
+
minnChoices_ = {0, 2, 3};
|
|
123
|
+
updateBest(originalArgs);
|
|
124
|
+
}
|
|
125
|
+
|
|
126
|
+
Args AutotuneStrategy::ask(double elapsed) {
|
|
127
|
+
const double t = std::min(1.0, elapsed / maxDuration_);
|
|
128
|
+
trials_++;
|
|
129
|
+
|
|
130
|
+
if (trials_ == 1) {
|
|
131
|
+
return bestArgs_;
|
|
132
|
+
}
|
|
133
|
+
|
|
134
|
+
Args args = bestArgs_;
|
|
135
|
+
|
|
136
|
+
if (!args.isManual("epoch")) {
|
|
137
|
+
args.epoch = updateArgGauss(args.epoch, 1, 100, 2.8, 2.5, t, false, rng_);
|
|
138
|
+
}
|
|
139
|
+
if (!args.isManual("lr")) {
|
|
140
|
+
args.lr = updateArgGauss(args.lr, 0.01, 5.0, 1.9, 1.0, t, false, rng_);
|
|
141
|
+
};
|
|
142
|
+
if (!args.isManual("dim")) {
|
|
143
|
+
args.dim = updateArgGauss(args.dim, 1, 1000, 1.4, 0.3, t, false, rng_);
|
|
144
|
+
}
|
|
145
|
+
if (!args.isManual("wordNgrams")) {
|
|
146
|
+
args.wordNgrams =
|
|
147
|
+
updateArgGauss(args.wordNgrams, 1, 5, 4.3, 2.4, t, true, rng_);
|
|
148
|
+
}
|
|
149
|
+
if (!args.isManual("dsub")) {
|
|
150
|
+
int dsubExponent =
|
|
151
|
+
updateArgGauss(bestDsubExponent_, 1, 4, 2.0, 1.0, t, true, rng_);
|
|
152
|
+
args.dsub = (1 << dsubExponent);
|
|
153
|
+
}
|
|
154
|
+
if (!args.isManual("minn")) {
|
|
155
|
+
int minnIndex = updateArgGauss(
|
|
156
|
+
bestMinnIndex_,
|
|
157
|
+
0,
|
|
158
|
+
static_cast<int>(minnChoices_.size() - 1),
|
|
159
|
+
4.0,
|
|
160
|
+
1.4,
|
|
161
|
+
t,
|
|
162
|
+
true,
|
|
163
|
+
rng_);
|
|
164
|
+
args.minn = minnChoices_[minnIndex];
|
|
165
|
+
}
|
|
166
|
+
if (!args.isManual("maxn")) {
|
|
167
|
+
if (args.minn == 0) {
|
|
168
|
+
args.maxn = 0;
|
|
169
|
+
} else {
|
|
170
|
+
args.maxn = args.minn + 3;
|
|
171
|
+
}
|
|
172
|
+
}
|
|
173
|
+
if (!args.isManual("bucket")) {
|
|
174
|
+
int nonZeroBucket = updateArgGauss(
|
|
175
|
+
bestNonzeroBucket_, 10000, 10000000, 2.0, 1.5, t, false, rng_);
|
|
176
|
+
args.bucket = nonZeroBucket;
|
|
177
|
+
} else {
|
|
178
|
+
args.bucket = originalBucket_;
|
|
179
|
+
}
|
|
180
|
+
if (args.wordNgrams <= 1 && args.maxn == 0) {
|
|
181
|
+
args.bucket = 0;
|
|
182
|
+
}
|
|
183
|
+
if (!args.isManual("loss")) {
|
|
184
|
+
args.loss = loss_name::softmax;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
return args;
|
|
188
|
+
}
|
|
189
|
+
|
|
190
|
+
int AutotuneStrategy::getIndex(int val, const std::vector<int>& choices) {
|
|
191
|
+
auto found = std::find(choices.begin(), choices.end(), val);
|
|
192
|
+
int ind = 0;
|
|
193
|
+
if (found != choices.end()) {
|
|
194
|
+
ind = std::distance(choices.begin(), found);
|
|
195
|
+
}
|
|
196
|
+
return ind;
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
void AutotuneStrategy::updateBest(const Args& args) {
|
|
200
|
+
bestArgs_ = args;
|
|
201
|
+
bestMinnIndex_ = getIndex(args.minn, minnChoices_);
|
|
202
|
+
bestDsubExponent_ = log2(args.dsub);
|
|
203
|
+
if (args.bucket != 0) {
|
|
204
|
+
bestNonzeroBucket_ = args.bucket;
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
Autotune::Autotune(const std::shared_ptr<FastText>& fastText)
|
|
209
|
+
: fastText_(fastText),
|
|
210
|
+
elapsed_(0.),
|
|
211
|
+
bestScore_(0.),
|
|
212
|
+
trials_(0),
|
|
213
|
+
sizeConstraintFailed_(0),
|
|
214
|
+
continueTraining_(false),
|
|
215
|
+
strategy_(),
|
|
216
|
+
timer_() {}
|
|
217
|
+
|
|
218
|
+
void Autotune::printInfo(double maxDuration) {
|
|
219
|
+
double progress = elapsed_ * 100 / maxDuration;
|
|
220
|
+
progress = std::min(progress, 100.0);
|
|
221
|
+
|
|
222
|
+
std::cerr << "\r";
|
|
223
|
+
std::cerr << std::fixed;
|
|
224
|
+
std::cerr << "Progress: ";
|
|
225
|
+
std::cerr << std::setprecision(1) << std::setw(5) << progress << "%";
|
|
226
|
+
std::cerr << " Trials: " << std::setw(4) << trials_;
|
|
227
|
+
std::cerr << " Best score: " << std::setw(9) << std::setprecision(6);
|
|
228
|
+
if (bestScore_ == kUnknownBestScore) {
|
|
229
|
+
std::cerr << "unknown";
|
|
230
|
+
} else {
|
|
231
|
+
std::cerr << bestScore_;
|
|
232
|
+
}
|
|
233
|
+
std::cerr << " ETA: "
|
|
234
|
+
<< utils::ClockPrint(std::max(maxDuration - elapsed_, 0.0));
|
|
235
|
+
std::cerr << std::flush;
|
|
236
|
+
}
|
|
237
|
+
|
|
238
|
+
void Autotune::timer(
|
|
239
|
+
const std::chrono::steady_clock::time_point& start,
|
|
240
|
+
double maxDuration) {
|
|
241
|
+
elapsed_ = 0.0;
|
|
242
|
+
while (keepTraining(maxDuration)) {
|
|
243
|
+
std::this_thread::sleep_for(std::chrono::milliseconds(500));
|
|
244
|
+
elapsed_ = utils::getDuration(start, std::chrono::steady_clock::now());
|
|
245
|
+
printInfo(maxDuration);
|
|
246
|
+
}
|
|
247
|
+
abort();
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
bool Autotune::keepTraining(double maxDuration) const {
|
|
251
|
+
return continueTraining_ && elapsed_ < maxDuration;
|
|
252
|
+
}
|
|
253
|
+
|
|
254
|
+
void Autotune::abort() {
|
|
255
|
+
if (continueTraining_) {
|
|
256
|
+
continueTraining_ = false;
|
|
257
|
+
fastText_->abort();
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
void Autotune::startTimer(const Args& args) {
|
|
262
|
+
std::chrono::steady_clock::time_point start =
|
|
263
|
+
std::chrono::steady_clock::now();
|
|
264
|
+
timer_ = std::thread([=]() { timer(start, args.autotuneDuration); });
|
|
265
|
+
bestScore_ = kUnknownBestScore;
|
|
266
|
+
trials_ = 0;
|
|
267
|
+
continueTraining_ = true;
|
|
268
|
+
|
|
269
|
+
auto previousSignalHandler = std::signal(SIGINT, signalHandler);
|
|
270
|
+
interruptSignalHandler = [&]() {
|
|
271
|
+
std::signal(SIGINT, previousSignalHandler);
|
|
272
|
+
std::cerr << std::endl << "Aborting autotune..." << std::endl;
|
|
273
|
+
abort();
|
|
274
|
+
};
|
|
275
|
+
}
|
|
276
|
+
|
|
277
|
+
double Autotune::getMetricScore(
|
|
278
|
+
Meter& meter,
|
|
279
|
+
const metric_name& metricName,
|
|
280
|
+
const double metricValue,
|
|
281
|
+
const std::string& metricLabel) const {
|
|
282
|
+
double score = 0.0;
|
|
283
|
+
int32_t labelId = -1;
|
|
284
|
+
if (!metricLabel.empty()) {
|
|
285
|
+
labelId = fastText_->getLabelId(metricLabel);
|
|
286
|
+
if (labelId == -1) {
|
|
287
|
+
throw std::runtime_error("Unknown autotune metric label");
|
|
288
|
+
}
|
|
289
|
+
}
|
|
290
|
+
if (metricName == metric_name::f1score) {
|
|
291
|
+
score = meter.f1Score();
|
|
292
|
+
} else if (metricName == metric_name::f1scoreLabel) {
|
|
293
|
+
score = meter.f1Score(labelId);
|
|
294
|
+
} else if (metricName == metric_name::precisionAtRecall) {
|
|
295
|
+
score = meter.precisionAtRecall(metricValue);
|
|
296
|
+
} else if (metricName == metric_name::precisionAtRecallLabel) {
|
|
297
|
+
score = meter.precisionAtRecall(labelId, metricValue);
|
|
298
|
+
} else if (metricName == metric_name::recallAtPrecision) {
|
|
299
|
+
score = meter.recallAtPrecision(metricValue);
|
|
300
|
+
} else if (metricName == metric_name::recallAtPrecisionLabel) {
|
|
301
|
+
score = meter.recallAtPrecision(labelId, metricValue);
|
|
302
|
+
} else {
|
|
303
|
+
throw std::runtime_error("Unknown metric");
|
|
304
|
+
}
|
|
305
|
+
return score;
|
|
306
|
+
}
|
|
307
|
+
|
|
308
|
+
void Autotune::printArgs(const Args& args, const Args& autotuneArgs) {
|
|
309
|
+
LOG_VAL(epoch, args.epoch)
|
|
310
|
+
LOG_VAL(lr, args.lr)
|
|
311
|
+
LOG_VAL(dim, args.dim)
|
|
312
|
+
LOG_VAL(minCount, args.minCount)
|
|
313
|
+
LOG_VAL(wordNgrams, args.wordNgrams)
|
|
314
|
+
LOG_VAL(minn, args.minn)
|
|
315
|
+
LOG_VAL(maxn, args.maxn)
|
|
316
|
+
LOG_VAL(bucket, args.bucket)
|
|
317
|
+
LOG_VAL(dsub, args.dsub)
|
|
318
|
+
LOG_VAL(loss, args.lossToString(args.loss))
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
int Autotune::getCutoffForFileSize(
|
|
322
|
+
bool qout,
|
|
323
|
+
bool qnorm,
|
|
324
|
+
int dsub,
|
|
325
|
+
int64_t fileSize) const {
|
|
326
|
+
int64_t outModelSize = 0;
|
|
327
|
+
const int64_t outM = fastText_->getOutputMatrix()->size(0);
|
|
328
|
+
const int64_t outN = fastText_->getOutputMatrix()->size(1);
|
|
329
|
+
if (qout) {
|
|
330
|
+
const int64_t outputPqSize = 16 + 4 * (outN * (1 << 8));
|
|
331
|
+
outModelSize =
|
|
332
|
+
21 + (outM * ((outN + 2 - 1) / 2)) + outputPqSize + (qnorm ? outM : 0);
|
|
333
|
+
} else {
|
|
334
|
+
outModelSize = 16 + 4 * (outM * outN);
|
|
335
|
+
}
|
|
336
|
+
const int64_t dim = fastText_->getInputMatrix()->size(1);
|
|
337
|
+
|
|
338
|
+
int target = (fileSize - (107) - 4 * (1 << 8) * dim - outModelSize);
|
|
339
|
+
int cutoff = target / ((dim + dsub - 1) / dsub + (qnorm ? 1 : 0) + 10);
|
|
340
|
+
|
|
341
|
+
return std::max(cutoff, kCutoffLimit);
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
bool Autotune::quantize(Args& args, const Args& autotuneArgs) {
|
|
345
|
+
if (autotuneArgs.getAutotuneModelSize() == Args::kUnlimitedModelSize) {
|
|
346
|
+
return true;
|
|
347
|
+
}
|
|
348
|
+
auto outputSize = fastText_->getOutputMatrix()->size(0);
|
|
349
|
+
|
|
350
|
+
args.qnorm = true;
|
|
351
|
+
args.qout = (outputSize >= kCutoffLimit);
|
|
352
|
+
args.retrain = true;
|
|
353
|
+
args.cutoff = getCutoffForFileSize(
|
|
354
|
+
args.qout, args.qnorm, args.dsub, autotuneArgs.getAutotuneModelSize());
|
|
355
|
+
LOG_VAL(cutoff, args.cutoff);
|
|
356
|
+
if (args.cutoff == kCutoffLimit) {
|
|
357
|
+
return false;
|
|
358
|
+
}
|
|
359
|
+
fastText_->quantize(args);
|
|
360
|
+
|
|
361
|
+
return true;
|
|
362
|
+
}
|
|
363
|
+
|
|
364
|
+
void Autotune::printSkippedArgs(const Args& autotuneArgs) {
|
|
365
|
+
std::unordered_set<std::string> argsToCheck = {"epoch",
|
|
366
|
+
"lr",
|
|
367
|
+
"dim",
|
|
368
|
+
"wordNgrams",
|
|
369
|
+
"loss",
|
|
370
|
+
"bucket",
|
|
371
|
+
"minn",
|
|
372
|
+
"maxn",
|
|
373
|
+
"dsub"};
|
|
374
|
+
for (const auto& arg : argsToCheck) {
|
|
375
|
+
if (autotuneArgs.isManual(arg)) {
|
|
376
|
+
std::cerr << "Warning : " << arg
|
|
377
|
+
<< " is manually set to a specific value. "
|
|
378
|
+
<< "It will not be automatically optimized." << std::endl;
|
|
379
|
+
}
|
|
380
|
+
}
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
void Autotune::train(const Args& autotuneArgs) {
|
|
384
|
+
std::ifstream validationFileStream(autotuneArgs.autotuneValidationFile);
|
|
385
|
+
if (!validationFileStream.is_open()) {
|
|
386
|
+
throw std::invalid_argument("Validation file cannot be opened!");
|
|
387
|
+
}
|
|
388
|
+
printSkippedArgs(autotuneArgs);
|
|
389
|
+
|
|
390
|
+
bool sizeConstraintWarning = false;
|
|
391
|
+
int verbose = autotuneArgs.verbose;
|
|
392
|
+
Args bestTrainArgs(autotuneArgs);
|
|
393
|
+
Args trainArgs(autotuneArgs);
|
|
394
|
+
trainArgs.verbose = 0;
|
|
395
|
+
strategy_ = std::unique_ptr<AutotuneStrategy>(
|
|
396
|
+
new AutotuneStrategy(trainArgs, autotuneArgs.seed));
|
|
397
|
+
startTimer(autotuneArgs);
|
|
398
|
+
|
|
399
|
+
while (keepTraining(autotuneArgs.autotuneDuration)) {
|
|
400
|
+
trials_++;
|
|
401
|
+
|
|
402
|
+
trainArgs = strategy_->ask(elapsed_);
|
|
403
|
+
LOG_VAL(Trial, trials_)
|
|
404
|
+
printArgs(trainArgs, autotuneArgs);
|
|
405
|
+
ElapsedTimeMarker elapsedTimeMarker;
|
|
406
|
+
double currentScore = std::numeric_limits<double>::quiet_NaN();
|
|
407
|
+
try {
|
|
408
|
+
fastText_->train(trainArgs);
|
|
409
|
+
bool sizeConstraintOK = quantize(trainArgs, autotuneArgs);
|
|
410
|
+
if (sizeConstraintOK) {
|
|
411
|
+
const auto& metricLabel = autotuneArgs.getAutotuneMetricLabel();
|
|
412
|
+
Meter meter(!metricLabel.empty());
|
|
413
|
+
fastText_->test(
|
|
414
|
+
validationFileStream, autotuneArgs.autotunePredictions, 0.0, meter);
|
|
415
|
+
|
|
416
|
+
currentScore = getMetricScore(
|
|
417
|
+
meter,
|
|
418
|
+
autotuneArgs.getAutotuneMetric(),
|
|
419
|
+
autotuneArgs.getAutotuneMetricValue(),
|
|
420
|
+
metricLabel);
|
|
421
|
+
|
|
422
|
+
if (bestScore_ == kUnknownBestScore || (currentScore > bestScore_)) {
|
|
423
|
+
bestTrainArgs = trainArgs;
|
|
424
|
+
bestScore_ = currentScore;
|
|
425
|
+
strategy_->updateBest(bestTrainArgs);
|
|
426
|
+
}
|
|
427
|
+
} else {
|
|
428
|
+
sizeConstraintFailed_++;
|
|
429
|
+
if (!sizeConstraintWarning && trials_ > 10 &&
|
|
430
|
+
sizeConstraintFailed_ > (trials_ / 2)) {
|
|
431
|
+
sizeConstraintWarning = true;
|
|
432
|
+
std::cerr << std::endl
|
|
433
|
+
<< "Warning : requested model size is probably too small. "
|
|
434
|
+
"You may want to increase `autotune-modelsize`."
|
|
435
|
+
<< std::endl;
|
|
436
|
+
}
|
|
437
|
+
}
|
|
438
|
+
} catch (DenseMatrix::EncounteredNaNError&) {
|
|
439
|
+
// ignore diverging loss and go on
|
|
440
|
+
} catch (std::bad_alloc&) {
|
|
441
|
+
// ignore parameter samples asking too much memory
|
|
442
|
+
} catch (TimeoutError&) {
|
|
443
|
+
break;
|
|
444
|
+
} catch (FastText::AbortError&) {
|
|
445
|
+
break;
|
|
446
|
+
}
|
|
447
|
+
LOG_VAL_NAN(currentScore, currentScore)
|
|
448
|
+
LOG_VAL(train took, elapsedTimeMarker.getElapsed())
|
|
449
|
+
}
|
|
450
|
+
if (timer_.joinable()) {
|
|
451
|
+
timer_.join();
|
|
452
|
+
}
|
|
453
|
+
|
|
454
|
+
if (bestScore_ == kUnknownBestScore) {
|
|
455
|
+
std::string errorMessage;
|
|
456
|
+
if (sizeConstraintWarning) {
|
|
457
|
+
errorMessage =
|
|
458
|
+
"Couldn't fulfil model size constraint: please increase "
|
|
459
|
+
"`autotune-modelsize`.";
|
|
460
|
+
} else {
|
|
461
|
+
errorMessage =
|
|
462
|
+
"Didn't have enough time to train once: please increase "
|
|
463
|
+
"`autotune-duration`.";
|
|
464
|
+
}
|
|
465
|
+
throw std::runtime_error(errorMessage);
|
|
466
|
+
} else {
|
|
467
|
+
std::cerr << std::endl;
|
|
468
|
+
std::cerr << "Training again with best arguments" << std::endl;
|
|
469
|
+
bestTrainArgs.verbose = verbose;
|
|
470
|
+
LOG_VAL(Best selected args, 0)
|
|
471
|
+
printArgs(bestTrainArgs, autotuneArgs);
|
|
472
|
+
fastText_->train(bestTrainArgs);
|
|
473
|
+
quantize(bestTrainArgs, autotuneArgs);
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
|
|
477
|
+
} // namespace fasttext
|