fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
@@ -0,0 +1,63 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include <cstring>
|
12
|
+
#include <istream>
|
13
|
+
#include <ostream>
|
14
|
+
#include <random>
|
15
|
+
#include <vector>
|
16
|
+
|
17
|
+
#include "real.h"
|
18
|
+
#include "vector.h"
|
19
|
+
|
20
|
+
namespace fasttext {
|
21
|
+
|
22
|
+
class ProductQuantizer {
|
23
|
+
protected:
|
24
|
+
const int32_t nbits_ = 8;
|
25
|
+
const int32_t ksub_ = 1 << nbits_;
|
26
|
+
const int32_t max_points_per_cluster_ = 256;
|
27
|
+
const int32_t max_points_ = max_points_per_cluster_ * ksub_;
|
28
|
+
const int32_t seed_ = 1234;
|
29
|
+
const int32_t niter_ = 25;
|
30
|
+
const real eps_ = 1e-7;
|
31
|
+
|
32
|
+
int32_t dim_;
|
33
|
+
int32_t nsubq_;
|
34
|
+
int32_t dsub_;
|
35
|
+
int32_t lastdsub_;
|
36
|
+
|
37
|
+
std::vector<real> centroids_;
|
38
|
+
|
39
|
+
std::minstd_rand rng;
|
40
|
+
|
41
|
+
public:
|
42
|
+
ProductQuantizer() {}
|
43
|
+
ProductQuantizer(int32_t, int32_t);
|
44
|
+
|
45
|
+
real* get_centroids(int32_t, uint8_t);
|
46
|
+
const real* get_centroids(int32_t, uint8_t) const;
|
47
|
+
|
48
|
+
real assign_centroid(const real*, const real*, uint8_t*, int32_t) const;
|
49
|
+
void Estep(const real*, const real*, uint8_t*, int32_t, int32_t) const;
|
50
|
+
void MStep(const real*, real*, const uint8_t*, int32_t, int32_t);
|
51
|
+
void kmeans(const real*, real*, int32_t, int32_t);
|
52
|
+
void train(int, const real*);
|
53
|
+
|
54
|
+
real mulcode(const Vector&, const uint8_t*, int32_t, real) const;
|
55
|
+
void addcode(Vector&, const uint8_t*, int32_t, real) const;
|
56
|
+
void compute_code(const real*, uint8_t*) const;
|
57
|
+
void compute_codes(const real*, uint8_t*, int32_t) const;
|
58
|
+
|
59
|
+
void save(std::ostream&) const;
|
60
|
+
void load(std::istream&);
|
61
|
+
};
|
62
|
+
|
63
|
+
} // namespace fasttext
|
@@ -0,0 +1,117 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include "quantmatrix.h"
|
10
|
+
|
11
|
+
#include <assert.h>
|
12
|
+
#include <iostream>
|
13
|
+
#include <stdexcept>
|
14
|
+
|
15
|
+
namespace fasttext {
|
16
|
+
|
17
|
+
QuantMatrix::QuantMatrix() : Matrix(), qnorm_(false), codesize_(0) {}
|
18
|
+
|
19
|
+
QuantMatrix::QuantMatrix(DenseMatrix&& mat, int32_t dsub, bool qnorm)
|
20
|
+
: Matrix(mat.size(0), mat.size(1)),
|
21
|
+
qnorm_(qnorm),
|
22
|
+
codesize_(mat.size(0) * ((mat.size(1) + dsub - 1) / dsub)) {
|
23
|
+
codes_.resize(codesize_);
|
24
|
+
pq_ = std::unique_ptr<ProductQuantizer>(new ProductQuantizer(n_, dsub));
|
25
|
+
if (qnorm_) {
|
26
|
+
norm_codes_.resize(m_);
|
27
|
+
npq_ = std::unique_ptr<ProductQuantizer>(new ProductQuantizer(1, 1));
|
28
|
+
}
|
29
|
+
quantize(std::forward<DenseMatrix>(mat));
|
30
|
+
}
|
31
|
+
|
32
|
+
void QuantMatrix::quantizeNorm(const Vector& norms) {
|
33
|
+
assert(qnorm_);
|
34
|
+
assert(norms.size() == m_);
|
35
|
+
auto dataptr = norms.data();
|
36
|
+
npq_->train(m_, dataptr);
|
37
|
+
npq_->compute_codes(dataptr, norm_codes_.data(), m_);
|
38
|
+
}
|
39
|
+
|
40
|
+
void QuantMatrix::quantize(DenseMatrix&& mat) {
|
41
|
+
if (qnorm_) {
|
42
|
+
Vector norms(mat.size(0));
|
43
|
+
mat.l2NormRow(norms);
|
44
|
+
mat.divideRow(norms);
|
45
|
+
quantizeNorm(norms);
|
46
|
+
}
|
47
|
+
auto dataptr = mat.data();
|
48
|
+
pq_->train(m_, dataptr);
|
49
|
+
pq_->compute_codes(dataptr, codes_.data(), m_);
|
50
|
+
}
|
51
|
+
|
52
|
+
real QuantMatrix::dotRow(const Vector& vec, int64_t i) const {
|
53
|
+
assert(i >= 0);
|
54
|
+
assert(i < m_);
|
55
|
+
assert(vec.size() == n_);
|
56
|
+
real norm = 1;
|
57
|
+
if (qnorm_) {
|
58
|
+
norm = npq_->get_centroids(0, norm_codes_[i])[0];
|
59
|
+
}
|
60
|
+
return pq_->mulcode(vec, codes_.data(), i, norm);
|
61
|
+
}
|
62
|
+
|
63
|
+
void QuantMatrix::addVectorToRow(const Vector&, int64_t, real) {
|
64
|
+
throw std::runtime_error("Operation not permitted on quantized matrices.");
|
65
|
+
}
|
66
|
+
|
67
|
+
void QuantMatrix::addRowToVector(Vector& x, int32_t i, real a) const {
|
68
|
+
real norm = 1;
|
69
|
+
if (qnorm_) {
|
70
|
+
norm = npq_->get_centroids(0, norm_codes_[i])[0];
|
71
|
+
}
|
72
|
+
pq_->addcode(x, codes_.data(), i, a * norm);
|
73
|
+
}
|
74
|
+
|
75
|
+
void QuantMatrix::addRowToVector(Vector& x, int32_t i) const {
|
76
|
+
real norm = 1;
|
77
|
+
if (qnorm_) {
|
78
|
+
norm = npq_->get_centroids(0, norm_codes_[i])[0];
|
79
|
+
}
|
80
|
+
pq_->addcode(x, codes_.data(), i, norm);
|
81
|
+
}
|
82
|
+
|
83
|
+
void QuantMatrix::save(std::ostream& out) const {
|
84
|
+
out.write((char*)&qnorm_, sizeof(qnorm_));
|
85
|
+
out.write((char*)&m_, sizeof(m_));
|
86
|
+
out.write((char*)&n_, sizeof(n_));
|
87
|
+
out.write((char*)&codesize_, sizeof(codesize_));
|
88
|
+
out.write((char*)codes_.data(), codesize_ * sizeof(uint8_t));
|
89
|
+
pq_->save(out);
|
90
|
+
if (qnorm_) {
|
91
|
+
out.write((char*)norm_codes_.data(), m_ * sizeof(uint8_t));
|
92
|
+
npq_->save(out);
|
93
|
+
}
|
94
|
+
}
|
95
|
+
|
96
|
+
void QuantMatrix::load(std::istream& in) {
|
97
|
+
in.read((char*)&qnorm_, sizeof(qnorm_));
|
98
|
+
in.read((char*)&m_, sizeof(m_));
|
99
|
+
in.read((char*)&n_, sizeof(n_));
|
100
|
+
in.read((char*)&codesize_, sizeof(codesize_));
|
101
|
+
codes_ = std::vector<uint8_t>(codesize_);
|
102
|
+
in.read((char*)codes_.data(), codesize_ * sizeof(uint8_t));
|
103
|
+
pq_ = std::unique_ptr<ProductQuantizer>(new ProductQuantizer());
|
104
|
+
pq_->load(in);
|
105
|
+
if (qnorm_) {
|
106
|
+
norm_codes_ = std::vector<uint8_t>(m_);
|
107
|
+
in.read((char*)norm_codes_.data(), m_ * sizeof(uint8_t));
|
108
|
+
npq_ = std::unique_ptr<ProductQuantizer>(new ProductQuantizer());
|
109
|
+
npq_->load(in);
|
110
|
+
}
|
111
|
+
}
|
112
|
+
|
113
|
+
void QuantMatrix::dump(std::ostream&) const {
|
114
|
+
throw std::runtime_error("Operation not permitted on quantized matrices.");
|
115
|
+
}
|
116
|
+
|
117
|
+
} // namespace fasttext
|
@@ -0,0 +1,60 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include <cstdint>
|
12
|
+
#include <istream>
|
13
|
+
#include <ostream>
|
14
|
+
|
15
|
+
#include <memory>
|
16
|
+
#include <vector>
|
17
|
+
|
18
|
+
#include "real.h"
|
19
|
+
|
20
|
+
#include "densematrix.h"
|
21
|
+
#include "matrix.h"
|
22
|
+
#include "vector.h"
|
23
|
+
|
24
|
+
#include "productquantizer.h"
|
25
|
+
|
26
|
+
namespace fasttext {
|
27
|
+
|
28
|
+
class QuantMatrix : public Matrix {
|
29
|
+
protected:
|
30
|
+
std::unique_ptr<ProductQuantizer> pq_;
|
31
|
+
std::unique_ptr<ProductQuantizer> npq_;
|
32
|
+
|
33
|
+
std::vector<uint8_t> codes_;
|
34
|
+
std::vector<uint8_t> norm_codes_;
|
35
|
+
|
36
|
+
bool qnorm_;
|
37
|
+
int32_t codesize_;
|
38
|
+
|
39
|
+
public:
|
40
|
+
QuantMatrix();
|
41
|
+
QuantMatrix(DenseMatrix&&, int32_t, bool);
|
42
|
+
QuantMatrix(const QuantMatrix&) = delete;
|
43
|
+
QuantMatrix(QuantMatrix&&) = delete;
|
44
|
+
QuantMatrix& operator=(const QuantMatrix&) = delete;
|
45
|
+
QuantMatrix& operator=(QuantMatrix&&) = delete;
|
46
|
+
virtual ~QuantMatrix() noexcept override = default;
|
47
|
+
|
48
|
+
void quantizeNorm(const Vector&);
|
49
|
+
void quantize(DenseMatrix&& mat);
|
50
|
+
|
51
|
+
real dotRow(const Vector&, int64_t) const override;
|
52
|
+
void addVectorToRow(const Vector&, int64_t, real) override;
|
53
|
+
void addRowToVector(Vector& x, int32_t i) const override;
|
54
|
+
void addRowToVector(Vector& x, int32_t i, real a) const override;
|
55
|
+
void save(std::ostream&) const override;
|
56
|
+
void load(std::istream&) override;
|
57
|
+
void dump(std::ostream&) const override;
|
58
|
+
};
|
59
|
+
|
60
|
+
} // namespace fasttext
|
@@ -0,0 +1,15 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
namespace fasttext {
|
12
|
+
|
13
|
+
typedef float real;
|
14
|
+
|
15
|
+
}
|
@@ -0,0 +1,28 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include "utils.h"
|
10
|
+
|
11
|
+
#include <ios>
|
12
|
+
|
13
|
+
namespace fasttext {
|
14
|
+
|
15
|
+
namespace utils {
|
16
|
+
|
17
|
+
int64_t size(std::ifstream& ifs) {
|
18
|
+
ifs.seekg(std::streamoff(0), std::ios::end);
|
19
|
+
return ifs.tellg();
|
20
|
+
}
|
21
|
+
|
22
|
+
void seek(std::ifstream& ifs, int64_t pos) {
|
23
|
+
ifs.clear();
|
24
|
+
ifs.seekg(std::streampos(pos));
|
25
|
+
}
|
26
|
+
} // namespace utils
|
27
|
+
|
28
|
+
} // namespace fasttext
|
@@ -0,0 +1,43 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include "real.h"
|
12
|
+
|
13
|
+
#include <algorithm>
|
14
|
+
#include <fstream>
|
15
|
+
#include <vector>
|
16
|
+
|
17
|
+
#if defined(__clang__) || defined(__GNUC__)
|
18
|
+
#define FASTTEXT_DEPRECATED(msg) __attribute__((__deprecated__(msg)))
|
19
|
+
#elif defined(_MSC_VER)
|
20
|
+
#define FASTTEXT_DEPRECATED(msg) __declspec(deprecated(msg))
|
21
|
+
#else
|
22
|
+
#define FASTTEXT_DEPRECATED(msg)
|
23
|
+
#endif
|
24
|
+
|
25
|
+
namespace fasttext {
|
26
|
+
|
27
|
+
using Predictions = std::vector<std::pair<real, int32_t>>;
|
28
|
+
|
29
|
+
namespace utils {
|
30
|
+
|
31
|
+
int64_t size(std::ifstream&);
|
32
|
+
|
33
|
+
void seek(std::ifstream&, int64_t);
|
34
|
+
|
35
|
+
template <typename T>
|
36
|
+
bool contains(const std::vector<T>& container, const T& value) {
|
37
|
+
return std::find(container.begin(), container.end(), value) !=
|
38
|
+
container.end();
|
39
|
+
}
|
40
|
+
|
41
|
+
} // namespace utils
|
42
|
+
|
43
|
+
} // namespace fasttext
|
@@ -0,0 +1,97 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include "vector.h"
|
10
|
+
|
11
|
+
#include <assert.h>
|
12
|
+
|
13
|
+
#include <cmath>
|
14
|
+
#include <iomanip>
|
15
|
+
#include <utility>
|
16
|
+
|
17
|
+
#include "matrix.h"
|
18
|
+
|
19
|
+
namespace fasttext {
|
20
|
+
|
21
|
+
Vector::Vector(int64_t m) : data_(m) {}
|
22
|
+
|
23
|
+
void Vector::zero() {
|
24
|
+
std::fill(data_.begin(), data_.end(), 0.0);
|
25
|
+
}
|
26
|
+
|
27
|
+
real Vector::norm() const {
|
28
|
+
real sum = 0;
|
29
|
+
for (int64_t i = 0; i < size(); i++) {
|
30
|
+
sum += data_[i] * data_[i];
|
31
|
+
}
|
32
|
+
return std::sqrt(sum);
|
33
|
+
}
|
34
|
+
|
35
|
+
void Vector::mul(real a) {
|
36
|
+
for (int64_t i = 0; i < size(); i++) {
|
37
|
+
data_[i] *= a;
|
38
|
+
}
|
39
|
+
}
|
40
|
+
|
41
|
+
void Vector::addVector(const Vector& source) {
|
42
|
+
assert(size() == source.size());
|
43
|
+
for (int64_t i = 0; i < size(); i++) {
|
44
|
+
data_[i] += source.data_[i];
|
45
|
+
}
|
46
|
+
}
|
47
|
+
|
48
|
+
void Vector::addVector(const Vector& source, real s) {
|
49
|
+
assert(size() == source.size());
|
50
|
+
for (int64_t i = 0; i < size(); i++) {
|
51
|
+
data_[i] += s * source.data_[i];
|
52
|
+
}
|
53
|
+
}
|
54
|
+
|
55
|
+
void Vector::addRow(const Matrix& A, int64_t i, real a) {
|
56
|
+
assert(i >= 0);
|
57
|
+
assert(i < A.size(0));
|
58
|
+
assert(size() == A.size(1));
|
59
|
+
A.addRowToVector(*this, i, a);
|
60
|
+
}
|
61
|
+
|
62
|
+
void Vector::addRow(const Matrix& A, int64_t i) {
|
63
|
+
assert(i >= 0);
|
64
|
+
assert(i < A.size(0));
|
65
|
+
assert(size() == A.size(1));
|
66
|
+
A.addRowToVector(*this, i);
|
67
|
+
}
|
68
|
+
|
69
|
+
void Vector::mul(const Matrix& A, const Vector& vec) {
|
70
|
+
assert(A.size(0) == size());
|
71
|
+
assert(A.size(1) == vec.size());
|
72
|
+
for (int64_t i = 0; i < size(); i++) {
|
73
|
+
data_[i] = A.dotRow(vec, i);
|
74
|
+
}
|
75
|
+
}
|
76
|
+
|
77
|
+
int64_t Vector::argmax() {
|
78
|
+
real max = data_[0];
|
79
|
+
int64_t argmax = 0;
|
80
|
+
for (int64_t i = 1; i < size(); i++) {
|
81
|
+
if (data_[i] > max) {
|
82
|
+
max = data_[i];
|
83
|
+
argmax = i;
|
84
|
+
}
|
85
|
+
}
|
86
|
+
return argmax;
|
87
|
+
}
|
88
|
+
|
89
|
+
std::ostream& operator<<(std::ostream& os, const Vector& v) {
|
90
|
+
os << std::setprecision(5);
|
91
|
+
for (int64_t j = 0; j < v.size(); j++) {
|
92
|
+
os << v[j] << ' ';
|
93
|
+
}
|
94
|
+
return os;
|
95
|
+
}
|
96
|
+
|
97
|
+
} // namespace fasttext
|
@@ -0,0 +1,61 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#pragma once
|
10
|
+
|
11
|
+
#include <cstdint>
|
12
|
+
#include <ostream>
|
13
|
+
#include <vector>
|
14
|
+
|
15
|
+
#include "real.h"
|
16
|
+
|
17
|
+
namespace fasttext {
|
18
|
+
|
19
|
+
class Matrix;
|
20
|
+
|
21
|
+
class Vector {
|
22
|
+
protected:
|
23
|
+
std::vector<real> data_;
|
24
|
+
|
25
|
+
public:
|
26
|
+
explicit Vector(int64_t);
|
27
|
+
Vector(const Vector&) = default;
|
28
|
+
Vector(Vector&&) noexcept = default;
|
29
|
+
Vector& operator=(const Vector&) = default;
|
30
|
+
Vector& operator=(Vector&&) = default;
|
31
|
+
|
32
|
+
inline real* data() {
|
33
|
+
return data_.data();
|
34
|
+
}
|
35
|
+
inline const real* data() const {
|
36
|
+
return data_.data();
|
37
|
+
}
|
38
|
+
inline real& operator[](int64_t i) {
|
39
|
+
return data_[i];
|
40
|
+
}
|
41
|
+
inline const real& operator[](int64_t i) const {
|
42
|
+
return data_[i];
|
43
|
+
}
|
44
|
+
|
45
|
+
inline int64_t size() const {
|
46
|
+
return data_.size();
|
47
|
+
}
|
48
|
+
void zero();
|
49
|
+
void mul(real);
|
50
|
+
real norm() const;
|
51
|
+
void addVector(const Vector& source);
|
52
|
+
void addVector(const Vector&, real);
|
53
|
+
void addRow(const Matrix&, int64_t);
|
54
|
+
void addRow(const Matrix&, int64_t, real);
|
55
|
+
void mul(const Matrix&, const Vector&);
|
56
|
+
int64_t argmax();
|
57
|
+
};
|
58
|
+
|
59
|
+
std::ostream& operator<<(std::ostream&, const Vector&);
|
60
|
+
|
61
|
+
} // namespace fasttext
|