fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: e0df128ff3b10090bc104614fbdaafe7db5be0077acab3da63c53c442451115a
|
4
|
+
data.tar.gz: ff998e44fcf39523929aff86a17c91153ea284a460388107ac7849c0566d6367
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 56447b44cf08ba76deaec721f66ef16d122ddae9b2ccf732fb6c364b8df283e788331c267447bcb0aab4d012d3bf94861ed30375050b6d024be6e030d0773a0a
|
7
|
+
data.tar.gz: 9c2be8700058fb1c01cf0df61970b48670ac75688c88c9d1ec502d5ee690cc5b328d4b903b52314937b5477b043fb0d83d3d7a1b8cfeda4026d35e4300cb8756
|
data/CHANGELOG.md
ADDED
data/LICENSE.txt
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
Copyright (c) 2019 Andrew Kane
|
2
|
+
|
3
|
+
MIT License
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
"Software"), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
19
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
20
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
21
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
22
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,251 @@
|
|
1
|
+
# fastText
|
2
|
+
|
3
|
+
[fastText](https://fasttext.cc) - efficient text classification and representation learning - for Ruby
|
4
|
+
|
5
|
+
## Installation
|
6
|
+
|
7
|
+
Add this line to your application’s Gemfile:
|
8
|
+
|
9
|
+
```ruby
|
10
|
+
gem 'fasttext'
|
11
|
+
```
|
12
|
+
|
13
|
+
## Text Classification
|
14
|
+
|
15
|
+
Prep your data
|
16
|
+
|
17
|
+
```ruby
|
18
|
+
# documents
|
19
|
+
x = [
|
20
|
+
"text from document one",
|
21
|
+
"text from document two",
|
22
|
+
"text from document three"
|
23
|
+
]
|
24
|
+
|
25
|
+
# labels
|
26
|
+
y = ["ham", "ham", "spam"]
|
27
|
+
```
|
28
|
+
|
29
|
+
> Use an array if a document has multiple labels
|
30
|
+
|
31
|
+
Train a model
|
32
|
+
|
33
|
+
```ruby
|
34
|
+
model = FastText::Classifier.new
|
35
|
+
model.fit(x, y)
|
36
|
+
```
|
37
|
+
|
38
|
+
Get predictions
|
39
|
+
|
40
|
+
```ruby
|
41
|
+
model.predict(x)
|
42
|
+
```
|
43
|
+
|
44
|
+
Save the model to a file
|
45
|
+
|
46
|
+
```ruby
|
47
|
+
model.save_model("model.bin")
|
48
|
+
```
|
49
|
+
|
50
|
+
Load the model from a file
|
51
|
+
|
52
|
+
```ruby
|
53
|
+
model = FastText.load_model("model.bin")
|
54
|
+
```
|
55
|
+
|
56
|
+
Evaluate the model
|
57
|
+
|
58
|
+
```ruby
|
59
|
+
model.test(x_test, y_test)
|
60
|
+
```
|
61
|
+
|
62
|
+
Get words and labels
|
63
|
+
|
64
|
+
```ruby
|
65
|
+
model.words
|
66
|
+
model.labels
|
67
|
+
```
|
68
|
+
|
69
|
+
> Use `include_freq: true` to get their frequency
|
70
|
+
|
71
|
+
Compress the model - significantly reduces size but sacrifices a little performance
|
72
|
+
|
73
|
+
```ruby
|
74
|
+
model.quantize
|
75
|
+
model.save_model("model.ftz")
|
76
|
+
```
|
77
|
+
|
78
|
+
## Word Representations
|
79
|
+
|
80
|
+
Prep your data
|
81
|
+
|
82
|
+
```ruby
|
83
|
+
x = [
|
84
|
+
"text from document one",
|
85
|
+
"text from document two",
|
86
|
+
"text from document three"
|
87
|
+
]
|
88
|
+
```
|
89
|
+
|
90
|
+
Train a model
|
91
|
+
|
92
|
+
```ruby
|
93
|
+
model = FastText::Vectorizer.new
|
94
|
+
model.fit(x)
|
95
|
+
```
|
96
|
+
|
97
|
+
Get nearest neighbors
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
model.nearest_neighbors("asparagus")
|
101
|
+
```
|
102
|
+
|
103
|
+
Get analogies
|
104
|
+
|
105
|
+
```ruby
|
106
|
+
model.analogies("berlin", "germany", "france")
|
107
|
+
```
|
108
|
+
|
109
|
+
Get a word vector
|
110
|
+
|
111
|
+
```ruby
|
112
|
+
model.word_vector("carrot")
|
113
|
+
```
|
114
|
+
|
115
|
+
Get words
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
model.words
|
119
|
+
```
|
120
|
+
|
121
|
+
Save the model to a file
|
122
|
+
|
123
|
+
```ruby
|
124
|
+
model.save_model("model.bin")
|
125
|
+
```
|
126
|
+
|
127
|
+
Load the model from a file
|
128
|
+
|
129
|
+
```ruby
|
130
|
+
model = FastText.load_model("model.bin")
|
131
|
+
```
|
132
|
+
|
133
|
+
Use continuous bag-of-words
|
134
|
+
|
135
|
+
```ruby
|
136
|
+
model = FastText::Vectorizer.new(model: "cbow")
|
137
|
+
```
|
138
|
+
|
139
|
+
## Parameters
|
140
|
+
|
141
|
+
Text classification
|
142
|
+
|
143
|
+
```ruby
|
144
|
+
FastText::Classifier.new(
|
145
|
+
lr: 0.1, # learning rate
|
146
|
+
dim: 100, # size of word vectors
|
147
|
+
ws: 5, # size of the context window
|
148
|
+
epoch: 5, # number of epochs
|
149
|
+
min_count: 1, # minimal number of word occurences
|
150
|
+
min_count_label: 1, # minimal number of label occurences
|
151
|
+
minn: 0, # min length of char ngram
|
152
|
+
maxn: 0, # max length of char ngram
|
153
|
+
neg: 5, # number of negatives sampled
|
154
|
+
word_ngrams: 1, # max length of word ngram
|
155
|
+
loss: "softmax", # loss function {ns, hs, softmax, ova}
|
156
|
+
bucket: 2000000, # number of buckets
|
157
|
+
thread: 3, # number of threads
|
158
|
+
lr_update_rate: 100, # change the rate of updates for the learning rate
|
159
|
+
t: 0.0001, # sampling threshold
|
160
|
+
label_prefix: "__label__" # label prefix
|
161
|
+
verbose: 2, # verbose
|
162
|
+
pretrained_vectors: nil # pretrained word vectors (.vec file)
|
163
|
+
)
|
164
|
+
```
|
165
|
+
|
166
|
+
Word representations
|
167
|
+
|
168
|
+
```ruby
|
169
|
+
FastText::Vectorizer.new(
|
170
|
+
model: "skipgram", # unsupervised fasttext model {cbow, skipgram}
|
171
|
+
lr: 0.05, # learning rate
|
172
|
+
dim: 100, # size of word vectors
|
173
|
+
ws: 5, # size of the context window
|
174
|
+
epoch: 5, # number of epochs
|
175
|
+
min_count: 5, # minimal number of word occurences
|
176
|
+
minn: 3, # min length of char ngram
|
177
|
+
maxn: 6, # max length of char ngram
|
178
|
+
neg: 5, # number of negatives sampled
|
179
|
+
word_ngrams: 1, # max length of word ngram
|
180
|
+
loss: "ns", # loss function {ns, hs, softmax, ova}
|
181
|
+
bucket: 2000000, # number of buckets
|
182
|
+
thread: 3, # number of threads
|
183
|
+
lr_update_rate: 100, # change the rate of updates for the learning rate
|
184
|
+
t: 0.0001, # sampling threshold
|
185
|
+
verbose: 2 # verbose
|
186
|
+
)
|
187
|
+
```
|
188
|
+
|
189
|
+
## Input Files
|
190
|
+
|
191
|
+
Input can be read directly from files
|
192
|
+
|
193
|
+
```ruby
|
194
|
+
model.fit("train.txt")
|
195
|
+
model.test("test.txt")
|
196
|
+
```
|
197
|
+
|
198
|
+
Each line should be a document
|
199
|
+
|
200
|
+
```txt
|
201
|
+
text from document one
|
202
|
+
text from document two
|
203
|
+
text from document three
|
204
|
+
```
|
205
|
+
|
206
|
+
For text classification, lines should start with a list of labels prefixed with `__label__`
|
207
|
+
|
208
|
+
```txt
|
209
|
+
__label__ham text from document one
|
210
|
+
__label__ham text from document two
|
211
|
+
__label__spam text from document three
|
212
|
+
```
|
213
|
+
|
214
|
+
## Pretrained Models
|
215
|
+
|
216
|
+
There are a number of [pretrained models](https://fasttext.cc/docs/en/english-vectors.html) you can download
|
217
|
+
|
218
|
+
### Language Identification
|
219
|
+
|
220
|
+
Download one of the [pretrained models](https://fasttext.cc/docs/en/language-identification.html) and load it
|
221
|
+
|
222
|
+
```ruby
|
223
|
+
model = FastText.load_model("lid.176.ftz")
|
224
|
+
```
|
225
|
+
|
226
|
+
Get language predictions
|
227
|
+
|
228
|
+
```ruby
|
229
|
+
model.predict("bon appétit")
|
230
|
+
```
|
231
|
+
|
232
|
+
## rbenv
|
233
|
+
|
234
|
+
This library uses [Rice](https://github.com/jasonroelofs/rice) to interface with the fastText C++ library. Unfortunately, Rice and rbenv don’t play nicely together. This is actively [being addressed](https://github.com/rbenv/ruby-build/pull/1368), but in the meantime, if you encounter an error during installation, reinstall your Ruby version with the `--enabled-shared` flag.
|
235
|
+
|
236
|
+
```sh
|
237
|
+
CONFIGURE_OPTS="--enable-shared" rbenv install 2.6.5
|
238
|
+
```
|
239
|
+
|
240
|
+
## History
|
241
|
+
|
242
|
+
View the [changelog](https://github.com/ankane/fasttext/blob/master/CHANGELOG.md)
|
243
|
+
|
244
|
+
## Contributing
|
245
|
+
|
246
|
+
Everyone is encouraged to help improve this project. Here are a few ways you can help:
|
247
|
+
|
248
|
+
- [Report bugs](https://github.com/ankane/fasttext/issues)
|
249
|
+
- Fix bugs and [submit pull requests](https://github.com/ankane/fasttext/pulls)
|
250
|
+
- Write, clarify, or fix documentation
|
251
|
+
- Suggest or add new features
|
@@ -0,0 +1,291 @@
|
|
1
|
+
#include <args.h>
|
2
|
+
#include <densematrix.h>
|
3
|
+
#include <fasttext.h>
|
4
|
+
#include <rice/Data_Type.hpp>
|
5
|
+
#include <rice/Constructor.hpp>
|
6
|
+
#include <rice/Array.hpp>
|
7
|
+
#include <rice/Hash.hpp>
|
8
|
+
#include <real.h>
|
9
|
+
#include <vector.h>
|
10
|
+
#include <cmath>
|
11
|
+
#include <iterator>
|
12
|
+
#include <sstream>
|
13
|
+
#include <stdexcept>
|
14
|
+
|
15
|
+
using namespace Rice;
|
16
|
+
|
17
|
+
template<>
|
18
|
+
inline
|
19
|
+
long long from_ruby<long long>(Object x)
|
20
|
+
{
|
21
|
+
return NUM2LL(x);
|
22
|
+
}
|
23
|
+
|
24
|
+
template<>
|
25
|
+
inline
|
26
|
+
Object to_ruby<long long>(long long const & x)
|
27
|
+
{
|
28
|
+
return LL2NUM(x);
|
29
|
+
}
|
30
|
+
|
31
|
+
template<>
|
32
|
+
inline
|
33
|
+
unsigned long long from_ruby<unsigned long long>(Object x)
|
34
|
+
{
|
35
|
+
return NUM2ULL(x);
|
36
|
+
}
|
37
|
+
|
38
|
+
template<>
|
39
|
+
inline
|
40
|
+
Object to_ruby<unsigned long long>(unsigned long long const & x)
|
41
|
+
{
|
42
|
+
return ULL2NUM(x);
|
43
|
+
}
|
44
|
+
|
45
|
+
template<>
|
46
|
+
inline
|
47
|
+
Object to_ruby<std::vector<std::pair<fasttext::real, std::string>>>(std::vector<std::pair<fasttext::real, std::string>> const & x)
|
48
|
+
{
|
49
|
+
Array ret;
|
50
|
+
for (const auto& v : x) {
|
51
|
+
Array a;
|
52
|
+
a.push(v.first);
|
53
|
+
a.push(v.second);
|
54
|
+
ret.push(a);
|
55
|
+
}
|
56
|
+
return ret;
|
57
|
+
}
|
58
|
+
|
59
|
+
fasttext::Args buildArgs(Hash h) {
|
60
|
+
fasttext::Args a;
|
61
|
+
|
62
|
+
std::vector<Hash::Entry> v;
|
63
|
+
Hash::iterator it = h.begin();
|
64
|
+
Hash::iterator end = h.end();
|
65
|
+
|
66
|
+
for(; it != end; ++it)
|
67
|
+
{
|
68
|
+
std::string name = from_ruby<std::string>(it->key.to_s());
|
69
|
+
Object value = it->value;
|
70
|
+
|
71
|
+
if (name == "input") {
|
72
|
+
a.input = from_ruby<std::string>(value);
|
73
|
+
} else if (name == "output") {
|
74
|
+
a.output = from_ruby<std::string>(value);
|
75
|
+
} else if (name == "lr") {
|
76
|
+
a.lr = from_ruby<double>(value);
|
77
|
+
} else if (name == "lr_update_rate") {
|
78
|
+
a.lrUpdateRate = from_ruby<int>(value);
|
79
|
+
} else if (name == "dim") {
|
80
|
+
a.dim = from_ruby<int>(value);
|
81
|
+
} else if (name == "ws") {
|
82
|
+
a.ws = from_ruby<int>(value);
|
83
|
+
} else if (name == "epoch") {
|
84
|
+
a.epoch = from_ruby<int>(value);
|
85
|
+
} else if (name == "min_count") {
|
86
|
+
a.minCount = from_ruby<int>(value);
|
87
|
+
} else if (name == "min_count_label") {
|
88
|
+
a.minCountLabel = from_ruby<int>(value);
|
89
|
+
} else if (name == "neg") {
|
90
|
+
a.neg = from_ruby<int>(value);
|
91
|
+
} else if (name == "word_ngrams") {
|
92
|
+
a.wordNgrams = from_ruby<int>(value);
|
93
|
+
} else if (name == "loss") {
|
94
|
+
std::string str = from_ruby<std::string>(value);
|
95
|
+
if (str == "softmax") {
|
96
|
+
a.loss = fasttext::loss_name::softmax;
|
97
|
+
} else if (str == "ns") {
|
98
|
+
a.loss = fasttext::loss_name::ns;
|
99
|
+
} else if (str == "hs") {
|
100
|
+
a.loss = fasttext::loss_name::hs;
|
101
|
+
} else if (str == "ova") {
|
102
|
+
a.loss = fasttext::loss_name::ova;
|
103
|
+
} else {
|
104
|
+
throw std::invalid_argument("Unknown loss: " + str);
|
105
|
+
}
|
106
|
+
} else if (name == "model") {
|
107
|
+
std::string str = from_ruby<std::string>(value);
|
108
|
+
if (str == "supervised") {
|
109
|
+
a.model = fasttext::model_name::sup;
|
110
|
+
} else if (str == "skipgram") {
|
111
|
+
a.model = fasttext::model_name::sg;
|
112
|
+
} else if (str == "cbow") {
|
113
|
+
a.model = fasttext::model_name::cbow;
|
114
|
+
} else {
|
115
|
+
throw std::invalid_argument("Unknown model: " + str);
|
116
|
+
}
|
117
|
+
} else if (name == "bucket") {
|
118
|
+
a.bucket = from_ruby<int>(value);
|
119
|
+
} else if (name == "minn") {
|
120
|
+
a.minn = from_ruby<int>(value);
|
121
|
+
} else if (name == "maxn") {
|
122
|
+
a.maxn = from_ruby<int>(value);
|
123
|
+
} else if (name == "thread") {
|
124
|
+
a.thread = from_ruby<int>(value);
|
125
|
+
} else if (name == "t") {
|
126
|
+
a.t = from_ruby<double>(value);
|
127
|
+
} else if (name == "label_prefix") {
|
128
|
+
a.label = from_ruby<std::string>(value);
|
129
|
+
} else if (name == "verbose") {
|
130
|
+
a.verbose = from_ruby<int>(value);
|
131
|
+
} else if (name == "pretrained_vectors") {
|
132
|
+
a.pretrainedVectors = from_ruby<std::string>(value);
|
133
|
+
} else if (name == "save_output") {
|
134
|
+
a.saveOutput = from_ruby<bool>(value);
|
135
|
+
// } else if (name == "seed") {
|
136
|
+
// a.seed = from_ruby<int>(value);
|
137
|
+
} else {
|
138
|
+
throw std::invalid_argument("Unknown argument: " + name);
|
139
|
+
}
|
140
|
+
}
|
141
|
+
return a;
|
142
|
+
}
|
143
|
+
|
144
|
+
extern "C"
|
145
|
+
void Init_ext()
|
146
|
+
{
|
147
|
+
Module rb_mFastText = define_module("FastText");
|
148
|
+
Module rb_mExt = define_module_under(rb_mFastText, "Ext");
|
149
|
+
|
150
|
+
define_class_under<fasttext::FastText>(rb_mExt, "Model")
|
151
|
+
.define_constructor(Constructor<fasttext::FastText>())
|
152
|
+
.define_method(
|
153
|
+
"words",
|
154
|
+
*[](fasttext::FastText& m) {
|
155
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
156
|
+
std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::word);
|
157
|
+
|
158
|
+
Array vocab_list;
|
159
|
+
Array vocab_freq;
|
160
|
+
for (int32_t i = 0; i < d->nwords(); i++) {
|
161
|
+
vocab_list.push(d->getWord(i));
|
162
|
+
vocab_freq.push(freq[i]);
|
163
|
+
}
|
164
|
+
|
165
|
+
Array ret;
|
166
|
+
ret.push(vocab_list);
|
167
|
+
ret.push(vocab_freq);
|
168
|
+
return ret;
|
169
|
+
})
|
170
|
+
.define_method(
|
171
|
+
"labels",
|
172
|
+
*[](fasttext::FastText& m) {
|
173
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
174
|
+
std::vector<int64_t> freq = d->getCounts(fasttext::entry_type::label);
|
175
|
+
|
176
|
+
Array vocab_list;
|
177
|
+
Array vocab_freq;
|
178
|
+
for (int32_t i = 0; i < d->nlabels(); i++) {
|
179
|
+
vocab_list.push(d->getLabel(i));
|
180
|
+
vocab_freq.push(freq[i]);
|
181
|
+
}
|
182
|
+
|
183
|
+
Array ret;
|
184
|
+
ret.push(vocab_list);
|
185
|
+
ret.push(vocab_freq);
|
186
|
+
return ret;
|
187
|
+
})
|
188
|
+
.define_method(
|
189
|
+
"test",
|
190
|
+
*[](fasttext::FastText& m, const std::string filename, int32_t k) {
|
191
|
+
std::ifstream ifs(filename);
|
192
|
+
if (!ifs.is_open()) {
|
193
|
+
throw std::invalid_argument("Test file cannot be opened!");
|
194
|
+
}
|
195
|
+
fasttext::Meter meter;
|
196
|
+
m.test(ifs, k, 0.0, meter);
|
197
|
+
ifs.close();
|
198
|
+
|
199
|
+
Array ret;
|
200
|
+
ret.push(meter.nexamples());
|
201
|
+
ret.push(meter.precision());
|
202
|
+
ret.push(meter.recall());
|
203
|
+
return ret;
|
204
|
+
})
|
205
|
+
.define_method(
|
206
|
+
"load_model",
|
207
|
+
*[](fasttext::FastText& m, std::string s) { m.loadModel(s); })
|
208
|
+
.define_method(
|
209
|
+
"save_model",
|
210
|
+
*[](fasttext::FastText& m, std::string s) { m.saveModel(s); })
|
211
|
+
.define_method("dimension", &fasttext::FastText::getDimension)
|
212
|
+
.define_method("quantized?", &fasttext::FastText::isQuant)
|
213
|
+
.define_method("word_id", &fasttext::FastText::getWordId)
|
214
|
+
.define_method("subword_id", &fasttext::FastText::getSubwordId)
|
215
|
+
.define_method(
|
216
|
+
"predict",
|
217
|
+
*[](fasttext::FastText& m, const std::string text, int32_t k, float threshold) {
|
218
|
+
std::stringstream ioss(text);
|
219
|
+
std::vector<std::pair<fasttext::real, std::string>> predictions;
|
220
|
+
m.predictLine(ioss, predictions, k, threshold);
|
221
|
+
return predictions;
|
222
|
+
})
|
223
|
+
.define_method(
|
224
|
+
"nearest_neighbors",
|
225
|
+
*[](fasttext::FastText& m, const std::string& word, int32_t k) {
|
226
|
+
return m.getNN(word, k);
|
227
|
+
})
|
228
|
+
.define_method("analogies", &fasttext::FastText::getAnalogies)
|
229
|
+
.define_method("ngram_vectors", &fasttext::FastText::getNgramVectors)
|
230
|
+
.define_method(
|
231
|
+
"word_vector",
|
232
|
+
*[](fasttext::FastText& m, const std::string word) {
|
233
|
+
int dimension = m.getDimension();
|
234
|
+
fasttext::Vector vec = fasttext::Vector(dimension);
|
235
|
+
m.getWordVector(vec, word);
|
236
|
+
float* data = vec.data();
|
237
|
+
Array ret;
|
238
|
+
for (int i = 0; i < dimension; i++) {
|
239
|
+
ret.push(data[i]);
|
240
|
+
}
|
241
|
+
return ret;
|
242
|
+
})
|
243
|
+
.define_method(
|
244
|
+
"subwords",
|
245
|
+
*[](fasttext::FastText& m, const std::string word) {
|
246
|
+
std::vector<std::string> subwords;
|
247
|
+
std::vector<int32_t> ngrams;
|
248
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
249
|
+
d->getSubwords(word, ngrams, subwords);
|
250
|
+
|
251
|
+
Array ret;
|
252
|
+
for (const auto& subword : subwords) {
|
253
|
+
ret.push(subword);
|
254
|
+
}
|
255
|
+
return ret;
|
256
|
+
})
|
257
|
+
.define_method(
|
258
|
+
"sentence_vector",
|
259
|
+
*[](fasttext::FastText& m, const std::string text) {
|
260
|
+
std::istringstream in(text);
|
261
|
+
int dimension = m.getDimension();
|
262
|
+
fasttext::Vector vec = fasttext::Vector(dimension);
|
263
|
+
m.getSentenceVector(in, vec);
|
264
|
+
float* data = vec.data();
|
265
|
+
Array ret;
|
266
|
+
for (int i = 0; i < dimension; i++) {
|
267
|
+
ret.push(data[i]);
|
268
|
+
}
|
269
|
+
return ret;
|
270
|
+
})
|
271
|
+
.define_method(
|
272
|
+
"train",
|
273
|
+
*[](fasttext::FastText& m, Hash h) {
|
274
|
+
m.train(buildArgs(h));
|
275
|
+
})
|
276
|
+
.define_method(
|
277
|
+
"quantize",
|
278
|
+
*[](fasttext::FastText& m, Hash h) {
|
279
|
+
m.quantize(buildArgs(h));
|
280
|
+
})
|
281
|
+
.define_method(
|
282
|
+
"supervised?",
|
283
|
+
*[](fasttext::FastText& m) {
|
284
|
+
return m.getArgs().model == fasttext::model_name::sup;
|
285
|
+
})
|
286
|
+
.define_method(
|
287
|
+
"label_prefix",
|
288
|
+
*[](fasttext::FastText& m) {
|
289
|
+
return m.getArgs().label;
|
290
|
+
});
|
291
|
+
}
|