fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
@@ -0,0 +1,191 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include <time.h>
|
12
|
+
|
13
|
+
#include <atomic>
|
14
|
+
#include <chrono>
|
15
|
+
#include <iostream>
|
16
|
+
#include <memory>
|
17
|
+
#include <queue>
|
18
|
+
#include <set>
|
19
|
+
#include <tuple>
|
20
|
+
|
21
|
+
#include "args.h"
|
22
|
+
#include "densematrix.h"
|
23
|
+
#include "dictionary.h"
|
24
|
+
#include "matrix.h"
|
25
|
+
#include "meter.h"
|
26
|
+
#include "model.h"
|
27
|
+
#include "real.h"
|
28
|
+
#include "utils.h"
|
29
|
+
#include "vector.h"
|
30
|
+
|
31
|
+
namespace fasttext {
|
32
|
+
|
33
|
+
class FastText {
|
34
|
+
protected:
|
35
|
+
std::shared_ptr<Args> args_;
|
36
|
+
std::shared_ptr<Dictionary> dict_;
|
37
|
+
|
38
|
+
std::shared_ptr<Matrix> input_;
|
39
|
+
std::shared_ptr<Matrix> output_;
|
40
|
+
|
41
|
+
std::shared_ptr<Model> model_;
|
42
|
+
|
43
|
+
std::atomic<int64_t> tokenCount_{};
|
44
|
+
std::atomic<real> loss_{};
|
45
|
+
|
46
|
+
std::chrono::steady_clock::time_point start_;
|
47
|
+
void signModel(std::ostream&);
|
48
|
+
bool checkModel(std::istream&);
|
49
|
+
void startThreads();
|
50
|
+
void addInputVector(Vector&, int32_t) const;
|
51
|
+
void trainThread(int32_t);
|
52
|
+
std::vector<std::pair<real, std::string>> getNN(
|
53
|
+
const DenseMatrix& wordVectors,
|
54
|
+
const Vector& queryVec,
|
55
|
+
int32_t k,
|
56
|
+
const std::set<std::string>& banSet);
|
57
|
+
void lazyComputeWordVectors();
|
58
|
+
void printInfo(real, real, std::ostream&);
|
59
|
+
std::shared_ptr<Matrix> getInputMatrixFromFile(const std::string&) const;
|
60
|
+
std::shared_ptr<Matrix> createRandomMatrix() const;
|
61
|
+
std::shared_ptr<Matrix> createTrainOutputMatrix() const;
|
62
|
+
std::vector<int64_t> getTargetCounts() const;
|
63
|
+
std::shared_ptr<Loss> createLoss(std::shared_ptr<Matrix>& output);
|
64
|
+
void supervised(
|
65
|
+
Model::State& state,
|
66
|
+
real lr,
|
67
|
+
const std::vector<int32_t>& line,
|
68
|
+
const std::vector<int32_t>& labels);
|
69
|
+
void cbow(Model::State& state, real lr, const std::vector<int32_t>& line);
|
70
|
+
void skipgram(Model::State& state, real lr, const std::vector<int32_t>& line);
|
71
|
+
|
72
|
+
bool quant_;
|
73
|
+
int32_t version;
|
74
|
+
std::unique_ptr<DenseMatrix> wordVectors_;
|
75
|
+
|
76
|
+
public:
|
77
|
+
FastText();
|
78
|
+
|
79
|
+
int32_t getWordId(const std::string& word) const;
|
80
|
+
|
81
|
+
int32_t getSubwordId(const std::string& subword) const;
|
82
|
+
|
83
|
+
void getWordVector(Vector& vec, const std::string& word) const;
|
84
|
+
|
85
|
+
void getSubwordVector(Vector& vec, const std::string& subword) const;
|
86
|
+
|
87
|
+
inline void getInputVector(Vector& vec, int32_t ind) {
|
88
|
+
vec.zero();
|
89
|
+
addInputVector(vec, ind);
|
90
|
+
}
|
91
|
+
|
92
|
+
const Args getArgs() const;
|
93
|
+
|
94
|
+
std::shared_ptr<const Dictionary> getDictionary() const;
|
95
|
+
|
96
|
+
std::shared_ptr<const DenseMatrix> getInputMatrix() const;
|
97
|
+
|
98
|
+
std::shared_ptr<const DenseMatrix> getOutputMatrix() const;
|
99
|
+
|
100
|
+
void saveVectors(const std::string& filename);
|
101
|
+
|
102
|
+
void saveModel(const std::string& filename);
|
103
|
+
|
104
|
+
void saveOutput(const std::string& filename);
|
105
|
+
|
106
|
+
void loadModel(std::istream& in);
|
107
|
+
|
108
|
+
void loadModel(const std::string& filename);
|
109
|
+
|
110
|
+
void getSentenceVector(std::istream& in, Vector& vec);
|
111
|
+
|
112
|
+
void quantize(const Args& qargs);
|
113
|
+
|
114
|
+
std::tuple<int64_t, double, double>
|
115
|
+
test(std::istream& in, int32_t k, real threshold = 0.0);
|
116
|
+
|
117
|
+
void test(std::istream& in, int32_t k, real threshold, Meter& meter) const;
|
118
|
+
|
119
|
+
void predict(
|
120
|
+
int32_t k,
|
121
|
+
const std::vector<int32_t>& words,
|
122
|
+
Predictions& predictions,
|
123
|
+
real threshold = 0.0) const;
|
124
|
+
|
125
|
+
bool predictLine(
|
126
|
+
std::istream& in,
|
127
|
+
std::vector<std::pair<real, std::string>>& predictions,
|
128
|
+
int32_t k,
|
129
|
+
real threshold) const;
|
130
|
+
|
131
|
+
std::vector<std::pair<std::string, Vector>> getNgramVectors(
|
132
|
+
const std::string& word) const;
|
133
|
+
|
134
|
+
std::vector<std::pair<real, std::string>> getNN(
|
135
|
+
const std::string& word,
|
136
|
+
int32_t k);
|
137
|
+
|
138
|
+
std::vector<std::pair<real, std::string>> getAnalogies(
|
139
|
+
int32_t k,
|
140
|
+
const std::string& wordA,
|
141
|
+
const std::string& wordB,
|
142
|
+
const std::string& wordC);
|
143
|
+
|
144
|
+
void train(const Args& args);
|
145
|
+
|
146
|
+
int getDimension() const;
|
147
|
+
|
148
|
+
bool isQuant() const;
|
149
|
+
|
150
|
+
FASTTEXT_DEPRECATED("loadVectors is being deprecated.")
|
151
|
+
void loadVectors(const std::string& filename);
|
152
|
+
|
153
|
+
FASTTEXT_DEPRECATED(
|
154
|
+
"getVector is being deprecated and replaced by getWordVector.")
|
155
|
+
void getVector(Vector& vec, const std::string& word) const;
|
156
|
+
|
157
|
+
FASTTEXT_DEPRECATED(
|
158
|
+
"ngramVectors is being deprecated and replaced by getNgramVectors.")
|
159
|
+
void ngramVectors(std::string word);
|
160
|
+
|
161
|
+
FASTTEXT_DEPRECATED(
|
162
|
+
"analogies is being deprecated and replaced by getAnalogies.")
|
163
|
+
void analogies(int32_t k);
|
164
|
+
|
165
|
+
FASTTEXT_DEPRECATED("selectEmbeddings is being deprecated.")
|
166
|
+
std::vector<int32_t> selectEmbeddings(int32_t cutoff) const;
|
167
|
+
|
168
|
+
FASTTEXT_DEPRECATED(
|
169
|
+
"saveVectors is being deprecated, please use the other signature.")
|
170
|
+
void saveVectors();
|
171
|
+
|
172
|
+
FASTTEXT_DEPRECATED(
|
173
|
+
"saveOutput is being deprecated, please use the other signature.")
|
174
|
+
void saveOutput();
|
175
|
+
|
176
|
+
FASTTEXT_DEPRECATED(
|
177
|
+
"saveModel is being deprecated, please use the other signature.")
|
178
|
+
void saveModel();
|
179
|
+
|
180
|
+
FASTTEXT_DEPRECATED("precomputeWordVectors is being deprecated.")
|
181
|
+
void precomputeWordVectors(DenseMatrix& wordVectors);
|
182
|
+
|
183
|
+
FASTTEXT_DEPRECATED("findNN is being deprecated and replaced by getNN.")
|
184
|
+
void findNN(
|
185
|
+
const DenseMatrix& wordVectors,
|
186
|
+
const Vector& query,
|
187
|
+
int32_t k,
|
188
|
+
const std::set<std::string>& banSet,
|
189
|
+
std::vector<std::pair<real, std::string>>& results);
|
190
|
+
};
|
191
|
+
} // namespace fasttext
|
@@ -0,0 +1,346 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include "loss.h"
|
10
|
+
#include "utils.h"
|
11
|
+
|
12
|
+
#include <cmath>
|
13
|
+
|
14
|
+
namespace fasttext {
|
15
|
+
|
16
|
+
constexpr int64_t SIGMOID_TABLE_SIZE = 512;
|
17
|
+
constexpr int64_t MAX_SIGMOID = 8;
|
18
|
+
constexpr int64_t LOG_TABLE_SIZE = 512;
|
19
|
+
|
20
|
+
bool comparePairs(
|
21
|
+
const std::pair<real, int32_t>& l,
|
22
|
+
const std::pair<real, int32_t>& r) {
|
23
|
+
return l.first > r.first;
|
24
|
+
}
|
25
|
+
|
26
|
+
real std_log(real x) {
|
27
|
+
return std::log(x + 1e-5);
|
28
|
+
}
|
29
|
+
|
30
|
+
Loss::Loss(std::shared_ptr<Matrix>& wo) : wo_(wo) {
|
31
|
+
t_sigmoid_.reserve(SIGMOID_TABLE_SIZE + 1);
|
32
|
+
for (int i = 0; i < SIGMOID_TABLE_SIZE + 1; i++) {
|
33
|
+
real x = real(i * 2 * MAX_SIGMOID) / SIGMOID_TABLE_SIZE - MAX_SIGMOID;
|
34
|
+
t_sigmoid_.push_back(1.0 / (1.0 + std::exp(-x)));
|
35
|
+
}
|
36
|
+
|
37
|
+
t_log_.reserve(LOG_TABLE_SIZE + 1);
|
38
|
+
for (int i = 0; i < LOG_TABLE_SIZE + 1; i++) {
|
39
|
+
real x = (real(i) + 1e-5) / LOG_TABLE_SIZE;
|
40
|
+
t_log_.push_back(std::log(x));
|
41
|
+
}
|
42
|
+
}
|
43
|
+
|
44
|
+
real Loss::log(real x) const {
|
45
|
+
if (x > 1.0) {
|
46
|
+
return 0.0;
|
47
|
+
}
|
48
|
+
int64_t i = int64_t(x * LOG_TABLE_SIZE);
|
49
|
+
return t_log_[i];
|
50
|
+
}
|
51
|
+
|
52
|
+
real Loss::sigmoid(real x) const {
|
53
|
+
if (x < -MAX_SIGMOID) {
|
54
|
+
return 0.0;
|
55
|
+
} else if (x > MAX_SIGMOID) {
|
56
|
+
return 1.0;
|
57
|
+
} else {
|
58
|
+
int64_t i =
|
59
|
+
int64_t((x + MAX_SIGMOID) * SIGMOID_TABLE_SIZE / MAX_SIGMOID / 2);
|
60
|
+
return t_sigmoid_[i];
|
61
|
+
}
|
62
|
+
}
|
63
|
+
|
64
|
+
void Loss::predict(
|
65
|
+
int32_t k,
|
66
|
+
real threshold,
|
67
|
+
Predictions& heap,
|
68
|
+
Model::State& state) const {
|
69
|
+
computeOutput(state);
|
70
|
+
findKBest(k, threshold, heap, state.output);
|
71
|
+
std::sort_heap(heap.begin(), heap.end(), comparePairs);
|
72
|
+
}
|
73
|
+
|
74
|
+
void Loss::findKBest(
|
75
|
+
int32_t k,
|
76
|
+
real threshold,
|
77
|
+
Predictions& heap,
|
78
|
+
const Vector& output) const {
|
79
|
+
for (int32_t i = 0; i < output.size(); i++) {
|
80
|
+
if (output[i] < threshold) {
|
81
|
+
continue;
|
82
|
+
}
|
83
|
+
if (heap.size() == k && std_log(output[i]) < heap.front().first) {
|
84
|
+
continue;
|
85
|
+
}
|
86
|
+
heap.push_back(std::make_pair(std_log(output[i]), i));
|
87
|
+
std::push_heap(heap.begin(), heap.end(), comparePairs);
|
88
|
+
if (heap.size() > k) {
|
89
|
+
std::pop_heap(heap.begin(), heap.end(), comparePairs);
|
90
|
+
heap.pop_back();
|
91
|
+
}
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
BinaryLogisticLoss::BinaryLogisticLoss(std::shared_ptr<Matrix>& wo)
|
96
|
+
: Loss(wo) {}
|
97
|
+
|
98
|
+
real BinaryLogisticLoss::binaryLogistic(
|
99
|
+
int32_t target,
|
100
|
+
Model::State& state,
|
101
|
+
bool labelIsPositive,
|
102
|
+
real lr,
|
103
|
+
bool backprop) const {
|
104
|
+
real score = sigmoid(wo_->dotRow(state.hidden, target));
|
105
|
+
if (backprop) {
|
106
|
+
real alpha = lr * (real(labelIsPositive) - score);
|
107
|
+
state.grad.addRow(*wo_, target, alpha);
|
108
|
+
wo_->addVectorToRow(state.hidden, target, alpha);
|
109
|
+
}
|
110
|
+
if (labelIsPositive) {
|
111
|
+
return -log(score);
|
112
|
+
} else {
|
113
|
+
return -log(1.0 - score);
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
void BinaryLogisticLoss::computeOutput(Model::State& state) const {
|
118
|
+
Vector& output = state.output;
|
119
|
+
output.mul(*wo_, state.hidden);
|
120
|
+
int32_t osz = output.size();
|
121
|
+
for (int32_t i = 0; i < osz; i++) {
|
122
|
+
output[i] = sigmoid(output[i]);
|
123
|
+
}
|
124
|
+
}
|
125
|
+
|
126
|
+
OneVsAllLoss::OneVsAllLoss(std::shared_ptr<Matrix>& wo)
|
127
|
+
: BinaryLogisticLoss(wo) {}
|
128
|
+
|
129
|
+
real OneVsAllLoss::forward(
|
130
|
+
const std::vector<int32_t>& targets,
|
131
|
+
int32_t /* we take all targets here */,
|
132
|
+
Model::State& state,
|
133
|
+
real lr,
|
134
|
+
bool backprop) {
|
135
|
+
real loss = 0.0;
|
136
|
+
int32_t osz = state.output.size();
|
137
|
+
for (int32_t i = 0; i < osz; i++) {
|
138
|
+
bool isMatch = utils::contains(targets, i);
|
139
|
+
loss += binaryLogistic(i, state, isMatch, lr, backprop);
|
140
|
+
}
|
141
|
+
|
142
|
+
return loss;
|
143
|
+
}
|
144
|
+
|
145
|
+
NegativeSamplingLoss::NegativeSamplingLoss(
|
146
|
+
std::shared_ptr<Matrix>& wo,
|
147
|
+
int neg,
|
148
|
+
const std::vector<int64_t>& targetCounts)
|
149
|
+
: BinaryLogisticLoss(wo), neg_(neg), negatives_(), uniform_() {
|
150
|
+
real z = 0.0;
|
151
|
+
for (size_t i = 0; i < targetCounts.size(); i++) {
|
152
|
+
z += pow(targetCounts[i], 0.5);
|
153
|
+
}
|
154
|
+
for (size_t i = 0; i < targetCounts.size(); i++) {
|
155
|
+
real c = pow(targetCounts[i], 0.5);
|
156
|
+
for (size_t j = 0; j < c * NegativeSamplingLoss::NEGATIVE_TABLE_SIZE / z;
|
157
|
+
j++) {
|
158
|
+
negatives_.push_back(i);
|
159
|
+
}
|
160
|
+
}
|
161
|
+
uniform_ = std::uniform_int_distribution<size_t>(0, negatives_.size() - 1);
|
162
|
+
}
|
163
|
+
|
164
|
+
real NegativeSamplingLoss::forward(
|
165
|
+
const std::vector<int32_t>& targets,
|
166
|
+
int32_t targetIndex,
|
167
|
+
Model::State& state,
|
168
|
+
real lr,
|
169
|
+
bool backprop) {
|
170
|
+
assert(targetIndex >= 0);
|
171
|
+
assert(targetIndex < targets.size());
|
172
|
+
int32_t target = targets[targetIndex];
|
173
|
+
real loss = binaryLogistic(target, state, true, lr, backprop);
|
174
|
+
|
175
|
+
for (int32_t n = 0; n < neg_; n++) {
|
176
|
+
auto negativeTarget = getNegative(target, state.rng);
|
177
|
+
loss += binaryLogistic(negativeTarget, state, false, lr, backprop);
|
178
|
+
}
|
179
|
+
return loss;
|
180
|
+
}
|
181
|
+
|
182
|
+
int32_t NegativeSamplingLoss::getNegative(
|
183
|
+
int32_t target,
|
184
|
+
std::minstd_rand& rng) {
|
185
|
+
int32_t negative;
|
186
|
+
do {
|
187
|
+
negative = negatives_[uniform_(rng)];
|
188
|
+
} while (target == negative);
|
189
|
+
return negative;
|
190
|
+
}
|
191
|
+
|
192
|
+
HierarchicalSoftmaxLoss::HierarchicalSoftmaxLoss(
|
193
|
+
std::shared_ptr<Matrix>& wo,
|
194
|
+
const std::vector<int64_t>& targetCounts)
|
195
|
+
: BinaryLogisticLoss(wo),
|
196
|
+
paths_(),
|
197
|
+
codes_(),
|
198
|
+
tree_(),
|
199
|
+
osz_(targetCounts.size()) {
|
200
|
+
buildTree(targetCounts);
|
201
|
+
}
|
202
|
+
|
203
|
+
void HierarchicalSoftmaxLoss::buildTree(const std::vector<int64_t>& counts) {
|
204
|
+
tree_.resize(2 * osz_ - 1);
|
205
|
+
for (int32_t i = 0; i < 2 * osz_ - 1; i++) {
|
206
|
+
tree_[i].parent = -1;
|
207
|
+
tree_[i].left = -1;
|
208
|
+
tree_[i].right = -1;
|
209
|
+
tree_[i].count = 1e15;
|
210
|
+
tree_[i].binary = false;
|
211
|
+
}
|
212
|
+
for (int32_t i = 0; i < osz_; i++) {
|
213
|
+
tree_[i].count = counts[i];
|
214
|
+
}
|
215
|
+
int32_t leaf = osz_ - 1;
|
216
|
+
int32_t node = osz_;
|
217
|
+
for (int32_t i = osz_; i < 2 * osz_ - 1; i++) {
|
218
|
+
int32_t mini[2] = {0};
|
219
|
+
for (int32_t j = 0; j < 2; j++) {
|
220
|
+
if (leaf >= 0 && tree_[leaf].count < tree_[node].count) {
|
221
|
+
mini[j] = leaf--;
|
222
|
+
} else {
|
223
|
+
mini[j] = node++;
|
224
|
+
}
|
225
|
+
}
|
226
|
+
tree_[i].left = mini[0];
|
227
|
+
tree_[i].right = mini[1];
|
228
|
+
tree_[i].count = tree_[mini[0]].count + tree_[mini[1]].count;
|
229
|
+
tree_[mini[0]].parent = i;
|
230
|
+
tree_[mini[1]].parent = i;
|
231
|
+
tree_[mini[1]].binary = true;
|
232
|
+
}
|
233
|
+
for (int32_t i = 0; i < osz_; i++) {
|
234
|
+
std::vector<int32_t> path;
|
235
|
+
std::vector<bool> code;
|
236
|
+
int32_t j = i;
|
237
|
+
while (tree_[j].parent != -1) {
|
238
|
+
path.push_back(tree_[j].parent - osz_);
|
239
|
+
code.push_back(tree_[j].binary);
|
240
|
+
j = tree_[j].parent;
|
241
|
+
}
|
242
|
+
paths_.push_back(path);
|
243
|
+
codes_.push_back(code);
|
244
|
+
}
|
245
|
+
}
|
246
|
+
|
247
|
+
real HierarchicalSoftmaxLoss::forward(
|
248
|
+
const std::vector<int32_t>& targets,
|
249
|
+
int32_t targetIndex,
|
250
|
+
Model::State& state,
|
251
|
+
real lr,
|
252
|
+
bool backprop) {
|
253
|
+
real loss = 0.0;
|
254
|
+
int32_t target = targets[targetIndex];
|
255
|
+
const std::vector<bool>& binaryCode = codes_[target];
|
256
|
+
const std::vector<int32_t>& pathToRoot = paths_[target];
|
257
|
+
for (int32_t i = 0; i < pathToRoot.size(); i++) {
|
258
|
+
loss += binaryLogistic(pathToRoot[i], state, binaryCode[i], lr, backprop);
|
259
|
+
}
|
260
|
+
return loss;
|
261
|
+
}
|
262
|
+
|
263
|
+
void HierarchicalSoftmaxLoss::predict(
|
264
|
+
int32_t k,
|
265
|
+
real threshold,
|
266
|
+
Predictions& heap,
|
267
|
+
Model::State& state) const {
|
268
|
+
dfs(k, threshold, 2 * osz_ - 2, 0.0, heap, state.hidden);
|
269
|
+
std::sort_heap(heap.begin(), heap.end(), comparePairs);
|
270
|
+
}
|
271
|
+
|
272
|
+
void HierarchicalSoftmaxLoss::dfs(
|
273
|
+
int32_t k,
|
274
|
+
real threshold,
|
275
|
+
int32_t node,
|
276
|
+
real score,
|
277
|
+
Predictions& heap,
|
278
|
+
const Vector& hidden) const {
|
279
|
+
if (score < std_log(threshold)) {
|
280
|
+
return;
|
281
|
+
}
|
282
|
+
if (heap.size() == k && score < heap.front().first) {
|
283
|
+
return;
|
284
|
+
}
|
285
|
+
|
286
|
+
if (tree_[node].left == -1 && tree_[node].right == -1) {
|
287
|
+
heap.push_back(std::make_pair(score, node));
|
288
|
+
std::push_heap(heap.begin(), heap.end(), comparePairs);
|
289
|
+
if (heap.size() > k) {
|
290
|
+
std::pop_heap(heap.begin(), heap.end(), comparePairs);
|
291
|
+
heap.pop_back();
|
292
|
+
}
|
293
|
+
return;
|
294
|
+
}
|
295
|
+
|
296
|
+
real f = wo_->dotRow(hidden, node - osz_);
|
297
|
+
f = 1. / (1 + std::exp(-f));
|
298
|
+
|
299
|
+
dfs(k, threshold, tree_[node].left, score + std_log(1.0 - f), heap, hidden);
|
300
|
+
dfs(k, threshold, tree_[node].right, score + std_log(f), heap, hidden);
|
301
|
+
}
|
302
|
+
|
303
|
+
SoftmaxLoss::SoftmaxLoss(std::shared_ptr<Matrix>& wo) : Loss(wo) {}
|
304
|
+
|
305
|
+
void SoftmaxLoss::computeOutput(Model::State& state) const {
|
306
|
+
Vector& output = state.output;
|
307
|
+
output.mul(*wo_, state.hidden);
|
308
|
+
real max = output[0], z = 0.0;
|
309
|
+
int32_t osz = output.size();
|
310
|
+
for (int32_t i = 0; i < osz; i++) {
|
311
|
+
max = std::max(output[i], max);
|
312
|
+
}
|
313
|
+
for (int32_t i = 0; i < osz; i++) {
|
314
|
+
output[i] = exp(output[i] - max);
|
315
|
+
z += output[i];
|
316
|
+
}
|
317
|
+
for (int32_t i = 0; i < osz; i++) {
|
318
|
+
output[i] /= z;
|
319
|
+
}
|
320
|
+
}
|
321
|
+
|
322
|
+
real SoftmaxLoss::forward(
|
323
|
+
const std::vector<int32_t>& targets,
|
324
|
+
int32_t targetIndex,
|
325
|
+
Model::State& state,
|
326
|
+
real lr,
|
327
|
+
bool backprop) {
|
328
|
+
computeOutput(state);
|
329
|
+
|
330
|
+
assert(targetIndex >= 0);
|
331
|
+
assert(targetIndex < targets.size());
|
332
|
+
int32_t target = targets[targetIndex];
|
333
|
+
|
334
|
+
if (backprop) {
|
335
|
+
int32_t osz = wo_->size(0);
|
336
|
+
for (int32_t i = 0; i < osz; i++) {
|
337
|
+
real label = (i == target) ? 1.0 : 0.0;
|
338
|
+
real alpha = lr * (label - state.output[i]);
|
339
|
+
state.grad.addRow(*wo_, i, alpha);
|
340
|
+
wo_->addVectorToRow(state.hidden, i, alpha);
|
341
|
+
}
|
342
|
+
}
|
343
|
+
return -log(state.output[target]);
|
344
|
+
};
|
345
|
+
|
346
|
+
} // namespace fasttext
|