fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
@@ -0,0 +1,15 @@
|
|
1
|
+
require "mkmf-rice"
|
2
|
+
|
3
|
+
abort "Missing stdc++" unless have_library("stdc++")
|
4
|
+
|
5
|
+
# TODO use -std=c++14 when available
|
6
|
+
$CXXFLAGS << " -pthread -std=c++11 -funroll-loops -O3 -march=native"
|
7
|
+
|
8
|
+
src1 = File.expand_path(".", __dir__)
|
9
|
+
src2 = File.expand_path("../../vendor/fastText/src", __dir__)
|
10
|
+
|
11
|
+
$srcs = Dir["{#{src1},#{src2}}/*.{cc,cpp}"]
|
12
|
+
$INCFLAGS << " -I$(srcdir)/../../vendor/fastText/src"
|
13
|
+
$VPATH << "$(srcdir)/../../vendor/fastText/src"
|
14
|
+
|
15
|
+
create_makefile("fasttext/ext")
|
data/lib/fasttext.rb
ADDED
@@ -0,0 +1,41 @@
|
|
1
|
+
# ext
|
2
|
+
require "fasttext/ext"
|
3
|
+
|
4
|
+
# modules
|
5
|
+
require "fasttext/model"
|
6
|
+
require "fasttext/classifier"
|
7
|
+
require "fasttext/vectorizer"
|
8
|
+
require "fasttext/version"
|
9
|
+
|
10
|
+
module FastText
|
11
|
+
class Error < StandardError; end
|
12
|
+
|
13
|
+
class << self
|
14
|
+
def load_model(path)
|
15
|
+
m = Ext::Model.new
|
16
|
+
m.load_model(path)
|
17
|
+
model =
|
18
|
+
if m.supervised?
|
19
|
+
FastText::Classifier.new
|
20
|
+
else
|
21
|
+
FastText::Vectorizer.new
|
22
|
+
end
|
23
|
+
model.instance_variable_set("@m", m)
|
24
|
+
model
|
25
|
+
end
|
26
|
+
|
27
|
+
def train_supervised(**options)
|
28
|
+
input = options.delete(:input)
|
29
|
+
model = FastText::Classifier.new(**options)
|
30
|
+
model.fit(input)
|
31
|
+
model
|
32
|
+
end
|
33
|
+
|
34
|
+
def train_unsupervised(**options)
|
35
|
+
input = options.delete(:input)
|
36
|
+
model = FastText::Vectorizer.new(**options)
|
37
|
+
model.fit(input)
|
38
|
+
model
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
module FastText
|
2
|
+
class Classifier < Model
|
3
|
+
DEFAULT_OPTIONS = {
|
4
|
+
lr: 0.1,
|
5
|
+
lr_update_rate: 100,
|
6
|
+
dim: 100,
|
7
|
+
ws: 5,
|
8
|
+
epoch: 5,
|
9
|
+
min_count: 1,
|
10
|
+
min_count_label: 0,
|
11
|
+
neg: 5,
|
12
|
+
word_ngrams: 1,
|
13
|
+
loss: "softmax",
|
14
|
+
model: "supervised",
|
15
|
+
bucket: 2000000,
|
16
|
+
minn: 0,
|
17
|
+
maxn: 0,
|
18
|
+
thread: 3,
|
19
|
+
t: 0.0001,
|
20
|
+
label_prefix: "__label__",
|
21
|
+
verbose: 2,
|
22
|
+
pretrained_vectors: "",
|
23
|
+
save_output: false,
|
24
|
+
# seed: 0
|
25
|
+
}
|
26
|
+
|
27
|
+
def fit(x, y = nil)
|
28
|
+
input = input_path(x, y)
|
29
|
+
@m ||= Ext::Model.new
|
30
|
+
m.train(DEFAULT_OPTIONS.merge(@options).merge(input: input, model: "supervised"))
|
31
|
+
end
|
32
|
+
|
33
|
+
# TODO support array of text
|
34
|
+
def predict(text, k: 1, threshold: 0.0)
|
35
|
+
m.predict(prep_text(text), k, threshold).map do |v|
|
36
|
+
[remove_prefix(v[1]), v[0]]
|
37
|
+
end.to_h
|
38
|
+
end
|
39
|
+
|
40
|
+
def test(x, y = nil, k: 1)
|
41
|
+
input = input_path(x, y)
|
42
|
+
res = m.test(input, k)
|
43
|
+
{
|
44
|
+
examples: res[0],
|
45
|
+
precision: res[1],
|
46
|
+
recall: res[2]
|
47
|
+
}
|
48
|
+
end
|
49
|
+
|
50
|
+
# TODO support options
|
51
|
+
def quantize
|
52
|
+
m.quantize({})
|
53
|
+
end
|
54
|
+
|
55
|
+
def labels(include_freq: false)
|
56
|
+
labels, freqs = m.labels
|
57
|
+
labels.map! { |v| remove_prefix(v) }
|
58
|
+
if include_freq
|
59
|
+
labels.zip(freqs).to_h
|
60
|
+
else
|
61
|
+
labels
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
private
|
66
|
+
|
67
|
+
def input_path(x, y)
|
68
|
+
if x.is_a?(String)
|
69
|
+
raise ArgumentError, "Cannot pass y with file" if y
|
70
|
+
x
|
71
|
+
else
|
72
|
+
tempfile = Tempfile.new("fasttext")
|
73
|
+
x.zip(y) do |xi, yi|
|
74
|
+
parts = Array(yi).map { |label| "__label__" + label }
|
75
|
+
parts << xi.gsub("\n", " ") # replace newlines in document
|
76
|
+
tempfile.write(parts.join(" "))
|
77
|
+
tempfile.write("\n")
|
78
|
+
end
|
79
|
+
tempfile.close
|
80
|
+
tempfile.path
|
81
|
+
end
|
82
|
+
end
|
83
|
+
|
84
|
+
def remove_prefix(label)
|
85
|
+
label.sub(label_prefix, "")
|
86
|
+
end
|
87
|
+
|
88
|
+
def label_prefix
|
89
|
+
m.label_prefix
|
90
|
+
end
|
91
|
+
end
|
92
|
+
end
|
Binary file
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module FastText
|
2
|
+
class Model
|
3
|
+
def initialize(**options)
|
4
|
+
@options = options
|
5
|
+
end
|
6
|
+
|
7
|
+
def dimension
|
8
|
+
m.dimension
|
9
|
+
end
|
10
|
+
|
11
|
+
def quantized?
|
12
|
+
m.quantized?
|
13
|
+
end
|
14
|
+
|
15
|
+
def save_model(path)
|
16
|
+
m.save_model(path)
|
17
|
+
end
|
18
|
+
|
19
|
+
def words(include_freq: false)
|
20
|
+
words, freqs = m.words
|
21
|
+
if include_freq
|
22
|
+
words.zip(freqs).to_h
|
23
|
+
else
|
24
|
+
words
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def word_vector(word)
|
29
|
+
m.word_vector(word)
|
30
|
+
end
|
31
|
+
|
32
|
+
def sentence_vector(text)
|
33
|
+
m.sentence_vector(prep_text(text))
|
34
|
+
end
|
35
|
+
|
36
|
+
def word_id(word)
|
37
|
+
m.word_id(word)
|
38
|
+
end
|
39
|
+
|
40
|
+
def subword_id(subword)
|
41
|
+
m.subword_id(subword)
|
42
|
+
end
|
43
|
+
|
44
|
+
def subwords(word)
|
45
|
+
m.subwords(word)
|
46
|
+
end
|
47
|
+
|
48
|
+
private
|
49
|
+
|
50
|
+
# text must end in newline for prediction to match Python and CLI
|
51
|
+
def prep_text(text)
|
52
|
+
text = "#{text}\n" unless text.end_with?("\n")
|
53
|
+
text
|
54
|
+
end
|
55
|
+
|
56
|
+
def m
|
57
|
+
@m || (raise Error, "Not fit")
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module FastText
|
2
|
+
class Vectorizer < Model
|
3
|
+
DEFAULT_OPTIONS = {
|
4
|
+
lr: 0.5,
|
5
|
+
lr_update_rate: 100,
|
6
|
+
dim: 100,
|
7
|
+
ws: 5,
|
8
|
+
epoch: 5,
|
9
|
+
min_count: 1,
|
10
|
+
min_count_label: 0,
|
11
|
+
neg: 5,
|
12
|
+
word_ngrams: 1,
|
13
|
+
loss: "ns",
|
14
|
+
model: "skipgram",
|
15
|
+
bucket: 2000000,
|
16
|
+
minn: 3,
|
17
|
+
maxn: 6,
|
18
|
+
thread: 3,
|
19
|
+
t: 0.0001,
|
20
|
+
verbose: 2,
|
21
|
+
pretrained_vectors: "",
|
22
|
+
save_output: false,
|
23
|
+
# seed: 0
|
24
|
+
}
|
25
|
+
|
26
|
+
def fit(x)
|
27
|
+
input = input_path(x)
|
28
|
+
@m ||= Ext::Model.new
|
29
|
+
m.train(DEFAULT_OPTIONS.merge(@options).merge(input: input))
|
30
|
+
end
|
31
|
+
|
32
|
+
def nearest_neighbors(word, k: 10)
|
33
|
+
m.nearest_neighbors(word, k).map(&:reverse).to_h
|
34
|
+
end
|
35
|
+
|
36
|
+
def analogies(word_a, word_b, word_c, k: 10)
|
37
|
+
m.analogies(k, word_a, word_b, word_c).map(&:reverse).to_h
|
38
|
+
end
|
39
|
+
|
40
|
+
private
|
41
|
+
|
42
|
+
# separate example by newlines
|
43
|
+
# https://github.com/facebookresearch/fastText/issues/518
|
44
|
+
def input_path(x)
|
45
|
+
if x.is_a?(String)
|
46
|
+
x
|
47
|
+
else
|
48
|
+
tempfile = Tempfile.new("fasttext")
|
49
|
+
x.each do |xi|
|
50
|
+
tempfile.write(xi)
|
51
|
+
tempfile.write("\n")
|
52
|
+
end
|
53
|
+
tempfile.close
|
54
|
+
tempfile.path
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This source code is licensed under the MIT license found in the
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
7
|
+
#
|
8
|
+
|
9
|
+
cmake_minimum_required(VERSION 2.8.9)
|
10
|
+
project(fasttext)
|
11
|
+
|
12
|
+
# The version number.
|
13
|
+
set (fasttext_VERSION_MAJOR 0)
|
14
|
+
set (fasttext_VERSION_MINOR 1)
|
15
|
+
|
16
|
+
include_directories(fasttext)
|
17
|
+
|
18
|
+
set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3 -march=native")
|
19
|
+
|
20
|
+
set(HEADER_FILES
|
21
|
+
src/args.h
|
22
|
+
src/densematrix.h
|
23
|
+
src/dictionary.h
|
24
|
+
src/fasttext.h
|
25
|
+
src/loss.h
|
26
|
+
src/matrix.h
|
27
|
+
src/meter.h
|
28
|
+
src/model.h
|
29
|
+
src/productquantizer.h
|
30
|
+
src/quantmatrix.h
|
31
|
+
src/real.h
|
32
|
+
src/utils.h
|
33
|
+
src/vector.h)
|
34
|
+
|
35
|
+
set(SOURCE_FILES
|
36
|
+
src/args.cc
|
37
|
+
src/densematrix.cc
|
38
|
+
src/dictionary.cc
|
39
|
+
src/fasttext.cc
|
40
|
+
src/loss.cc
|
41
|
+
src/main.cc
|
42
|
+
src/matrix.cc
|
43
|
+
src/meter.cc
|
44
|
+
src/model.cc
|
45
|
+
src/productquantizer.cc
|
46
|
+
src/quantmatrix.cc
|
47
|
+
src/utils.cc
|
48
|
+
src/vector.cc)
|
49
|
+
|
50
|
+
add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
|
51
|
+
add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
|
52
|
+
add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
|
53
|
+
set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
|
54
|
+
set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
|
55
|
+
set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
|
56
|
+
POSITION_INDEPENDENT_CODE True)
|
57
|
+
add_executable(fasttext-bin src/main.cc)
|
58
|
+
target_link_libraries(fasttext-bin pthread fasttext-static)
|
59
|
+
set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
|
60
|
+
install (TARGETS fasttext-shared
|
61
|
+
LIBRARY DESTINATION lib)
|
62
|
+
install (TARGETS fasttext-static
|
63
|
+
ARCHIVE DESTINATION lib)
|
64
|
+
install (TARGETS fasttext-static_pic
|
65
|
+
ARCHIVE DESTINATION lib)
|
66
|
+
install (TARGETS fasttext-bin
|
67
|
+
RUNTIME DESTINATION bin
|
68
|
+
PUBLIC_HEADER DESTINATION include/fasttext)
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# Contributing to fastText
|
2
|
+
We want to make contributing to this project as easy and transparent as possible.
|
3
|
+
|
4
|
+
## Issues
|
5
|
+
We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue.
|
6
|
+
|
7
|
+
### Reproducing issues
|
8
|
+
Please make sure that the issue you mention is not a result of one of the existing third-party libraries. For example, please do not post an issue if you encountered an error within a third-party Python library. We can only help you with errors which can be directly reproduced either with our C++ code or the corresponding Python bindings. If you do find an error, please post detailed steps to reproduce it. If we can't reproduce your error, we can't help you fix it.
|
9
|
+
|
10
|
+
## Pull Requests
|
11
|
+
Please post an Issue before submitting a pull request. This might save you some time as it is possible we can't support your contribution, albeit we try our best to accomodate your (planned) work and highly appreciate your time. Generally, it is best to have a pull request emerge from an issue rather than the other way around.
|
12
|
+
|
13
|
+
To create a pull request:
|
14
|
+
|
15
|
+
1. Fork the repo and create your branch from `master`.
|
16
|
+
2. If you've added code that should be tested, add tests.
|
17
|
+
3. If you've changed APIs, update the documentation.
|
18
|
+
4. Ensure the test suite passes.
|
19
|
+
5. Make sure your code lints.
|
20
|
+
6. If you haven't already, complete the Contributor License Agreement ("CLA").
|
21
|
+
|
22
|
+
## Tests
|
23
|
+
First, you will need to make sure you have the required data. For that, please have a look at the fetch_test_data.sh script under tests. Next run the tests using the runtests.py script passing a path to the directory containing the datasets.
|
24
|
+
|
25
|
+
## Contributor License Agreement ("CLA")
|
26
|
+
In order to accept your pull request, we need you to submit a CLA. You only need
|
27
|
+
to do this once to work on any of Facebook's open source projects.
|
28
|
+
|
29
|
+
Complete your CLA here: <https://code.facebook.com/cla>
|
30
|
+
|
31
|
+
## License
|
32
|
+
By contributing to fastText, you agree that your contributions will be licensed under its MIT license.
|
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2016-present, Facebook, Inc.
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
@@ -0,0 +1,63 @@
|
|
1
|
+
#
|
2
|
+
# Copyright (c) 2016-present, Facebook, Inc.
|
3
|
+
# All rights reserved.
|
4
|
+
#
|
5
|
+
# This source code is licensed under the MIT license found in the
|
6
|
+
# LICENSE file in the root directory of this source tree.
|
7
|
+
#
|
8
|
+
|
9
|
+
CXX = c++
|
10
|
+
CXXFLAGS = -pthread -std=c++0x -march=native
|
11
|
+
OBJS = args.o matrix.o dictionary.o loss.o productquantizer.o densematrix.o quantmatrix.o vector.o model.o utils.o meter.o fasttext.o
|
12
|
+
INCLUDES = -I.
|
13
|
+
|
14
|
+
opt: CXXFLAGS += -O3 -funroll-loops -DNDEBUG
|
15
|
+
opt: fasttext
|
16
|
+
|
17
|
+
coverage: CXXFLAGS += -O0 -fno-inline -fprofile-arcs --coverage
|
18
|
+
coverage: fasttext
|
19
|
+
|
20
|
+
debug: CXXFLAGS += -g -O0 -fno-inline
|
21
|
+
debug: fasttext
|
22
|
+
|
23
|
+
args.o: src/args.cc src/args.h
|
24
|
+
$(CXX) $(CXXFLAGS) -c src/args.cc
|
25
|
+
|
26
|
+
matrix.o: src/matrix.cc src/matrix.h
|
27
|
+
$(CXX) $(CXXFLAGS) -c src/matrix.cc
|
28
|
+
|
29
|
+
dictionary.o: src/dictionary.cc src/dictionary.h src/args.h
|
30
|
+
$(CXX) $(CXXFLAGS) -c src/dictionary.cc
|
31
|
+
|
32
|
+
loss.o: src/loss.cc src/loss.h src/matrix.h src/real.h
|
33
|
+
$(CXX) $(CXXFLAGS) -c src/loss.cc
|
34
|
+
|
35
|
+
productquantizer.o: src/productquantizer.cc src/productquantizer.h src/utils.h
|
36
|
+
$(CXX) $(CXXFLAGS) -c src/productquantizer.cc
|
37
|
+
|
38
|
+
densematrix.o: src/densematrix.cc src/densematrix.h src/utils.h src/matrix.h
|
39
|
+
$(CXX) $(CXXFLAGS) -c src/densematrix.cc
|
40
|
+
|
41
|
+
quantmatrix.o: src/quantmatrix.cc src/quantmatrix.h src/utils.h src/matrix.h
|
42
|
+
$(CXX) $(CXXFLAGS) -c src/quantmatrix.cc
|
43
|
+
|
44
|
+
vector.o: src/vector.cc src/vector.h src/utils.h
|
45
|
+
$(CXX) $(CXXFLAGS) -c src/vector.cc
|
46
|
+
|
47
|
+
model.o: src/model.cc src/model.h src/args.h
|
48
|
+
$(CXX) $(CXXFLAGS) -c src/model.cc
|
49
|
+
|
50
|
+
utils.o: src/utils.cc src/utils.h
|
51
|
+
$(CXX) $(CXXFLAGS) -c src/utils.cc
|
52
|
+
|
53
|
+
meter.o: src/meter.cc src/meter.h
|
54
|
+
$(CXX) $(CXXFLAGS) -c src/meter.cc
|
55
|
+
|
56
|
+
fasttext.o: src/fasttext.cc src/*.h
|
57
|
+
$(CXX) $(CXXFLAGS) -c src/fasttext.cc
|
58
|
+
|
59
|
+
fasttext: $(OBJS) src/fasttext.cc
|
60
|
+
$(CXX) $(CXXFLAGS) $(OBJS) src/main.cc -o fasttext
|
61
|
+
|
62
|
+
clean:
|
63
|
+
rm -rf *.o *.gcno *.gcda fasttext
|