fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
@@ -0,0 +1,49 @@
|
|
1
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the MIT license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
from __future__ import absolute_import
|
8
|
+
from __future__ import division
|
9
|
+
from __future__ import print_function
|
10
|
+
from __future__ import unicode_literals
|
11
|
+
|
12
|
+
from fasttext import load_model
|
13
|
+
from fasttext import tokenize
|
14
|
+
import sys
|
15
|
+
import time
|
16
|
+
import tempfile
|
17
|
+
import argparse
|
18
|
+
|
19
|
+
|
20
|
+
def get_word_vector(data, model):
|
21
|
+
t1 = time.time()
|
22
|
+
print("Reading")
|
23
|
+
with open(data, 'r') as f:
|
24
|
+
tokens = tokenize(f.read())
|
25
|
+
t2 = time.time()
|
26
|
+
print("Read TIME: " + str(t2 - t1))
|
27
|
+
print("Read NUM : " + str(len(tokens)))
|
28
|
+
f = load_model(model)
|
29
|
+
# This is not equivalent to piping the data into
|
30
|
+
# print-word-vector, because the data is tokenized
|
31
|
+
# first.
|
32
|
+
t3 = time.time()
|
33
|
+
i = 0
|
34
|
+
for t in tokens:
|
35
|
+
f.get_word_vector(t)
|
36
|
+
i += 1
|
37
|
+
if i % 10000 == 0:
|
38
|
+
sys.stderr.write("\ri: " + str(float(i / len(tokens))))
|
39
|
+
sys.stderr.flush()
|
40
|
+
t4 = time.time()
|
41
|
+
print("\nVectoring: " + str(t4 - t3))
|
42
|
+
|
43
|
+
|
44
|
+
if __name__ == "__main__":
|
45
|
+
parser = argparse.ArgumentParser(description='Simple benchmark for get_word_vector.')
|
46
|
+
parser.add_argument('model', help='A model file to use for benchmarking.')
|
47
|
+
parser.add_argument('data', help='A data file to use for benchmarking.')
|
48
|
+
args = parser.parse_args()
|
49
|
+
get_word_vector(args.data, args.model)
|
@@ -0,0 +1,81 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
|
9
|
+
# NOTE: This requires PyTorch! We do not provide installation scripts to install PyTorch.
|
10
|
+
# It is up to you to install this dependency if you want to execute this example.
|
11
|
+
# PyTorch's website should give you clear instructions on this: http://pytorch.org/
|
12
|
+
|
13
|
+
from __future__ import absolute_import
|
14
|
+
from __future__ import division
|
15
|
+
from __future__ import print_function
|
16
|
+
from __future__ import unicode_literals
|
17
|
+
from torch.nn.modules.sparse import EmbeddingBag
|
18
|
+
import numpy as np
|
19
|
+
import torch
|
20
|
+
import random
|
21
|
+
import string
|
22
|
+
import time
|
23
|
+
from fasttext import load_model
|
24
|
+
from torch.autograd import Variable
|
25
|
+
|
26
|
+
|
27
|
+
class FastTextEmbeddingBag(EmbeddingBag):
|
28
|
+
def __init__(self, model_path):
|
29
|
+
self.model = load_model(model_path)
|
30
|
+
input_matrix = self.model.get_input_matrix()
|
31
|
+
input_matrix_shape = input_matrix.shape
|
32
|
+
super().__init__(input_matrix_shape[0], input_matrix_shape[1])
|
33
|
+
self.weight.data.copy_(torch.FloatTensor(input_matrix))
|
34
|
+
|
35
|
+
def forward(self, words):
|
36
|
+
word_subinds = np.empty([0], dtype=np.int64)
|
37
|
+
word_offsets = [0]
|
38
|
+
for word in words:
|
39
|
+
_, subinds = self.model.get_subwords(word)
|
40
|
+
word_subinds = np.concatenate((word_subinds, subinds))
|
41
|
+
word_offsets.append(word_offsets[-1] + len(subinds))
|
42
|
+
word_offsets = word_offsets[:-1]
|
43
|
+
ind = Variable(torch.LongTensor(word_subinds))
|
44
|
+
offsets = Variable(torch.LongTensor(word_offsets))
|
45
|
+
return super().forward(ind, offsets)
|
46
|
+
|
47
|
+
|
48
|
+
def random_word(N):
|
49
|
+
return ''.join(
|
50
|
+
random.choices(
|
51
|
+
string.ascii_uppercase + string.ascii_lowercase + string.digits,
|
52
|
+
k=N
|
53
|
+
)
|
54
|
+
)
|
55
|
+
|
56
|
+
|
57
|
+
if __name__ == "__main__":
|
58
|
+
ft_emb = FastTextEmbeddingBag("fil9.bin")
|
59
|
+
model = load_model("fil9.bin")
|
60
|
+
num_lines = 200
|
61
|
+
total_seconds = 0.0
|
62
|
+
total_words = 0
|
63
|
+
for _ in range(num_lines):
|
64
|
+
words = [
|
65
|
+
random_word(random.randint(1, 10))
|
66
|
+
for _ in range(random.randint(15, 25))
|
67
|
+
]
|
68
|
+
total_words += len(words)
|
69
|
+
words_average_length = sum([len(word) for word in words]) / len(words)
|
70
|
+
start = time.clock()
|
71
|
+
words_emb = ft_emb(words)
|
72
|
+
total_seconds += (time.clock() - start)
|
73
|
+
for i in range(len(words)):
|
74
|
+
word = words[i]
|
75
|
+
ft_word_emb = model.get_word_vector(word)
|
76
|
+
py_emb = np.array(words_emb[i].data)
|
77
|
+
assert (np.isclose(ft_word_emb, py_emb).all())
|
78
|
+
print(
|
79
|
+
"Avg. {:2.5f} seconds to build embeddings for {} lines with a total of {} words.".
|
80
|
+
format(total_seconds, num_lines, total_words)
|
81
|
+
)
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
|
9
|
+
from __future__ import absolute_import
|
10
|
+
from __future__ import division
|
11
|
+
from __future__ import print_function
|
12
|
+
from __future__ import unicode_literals
|
13
|
+
from __future__ import division, absolute_import, print_function
|
14
|
+
|
15
|
+
from fasttext import load_model
|
16
|
+
import argparse
|
17
|
+
import errno
|
18
|
+
|
19
|
+
if __name__ == "__main__":
|
20
|
+
parser = argparse.ArgumentParser(
|
21
|
+
description=("Print fasttext .vec file to stdout from .bin file")
|
22
|
+
)
|
23
|
+
parser.add_argument(
|
24
|
+
"model",
|
25
|
+
help="Model to use",
|
26
|
+
)
|
27
|
+
args = parser.parse_args()
|
28
|
+
|
29
|
+
f = load_model(args.model)
|
30
|
+
words = f.get_words()
|
31
|
+
print(str(len(words)) + " " + str(f.get_dimension()))
|
32
|
+
for w in words:
|
33
|
+
v = f.get_word_vector(w)
|
34
|
+
vstr = ""
|
35
|
+
for vi in v:
|
36
|
+
vstr += " " + str(vi)
|
37
|
+
try:
|
38
|
+
print(w + vstr)
|
39
|
+
except IOError as e:
|
40
|
+
if e.errno == errno.EPIPE:
|
41
|
+
pass
|
@@ -0,0 +1,163 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
|
9
|
+
from __future__ import absolute_import
|
10
|
+
from __future__ import division
|
11
|
+
from __future__ import print_function
|
12
|
+
from __future__ import unicode_literals
|
13
|
+
from __future__ import division, absolute_import, print_function
|
14
|
+
|
15
|
+
from fasttext import load_model
|
16
|
+
from fasttext import util
|
17
|
+
import argparse
|
18
|
+
import numpy as np
|
19
|
+
|
20
|
+
|
21
|
+
def process_question(question, cossims, model, words, vectors):
|
22
|
+
correct = 0
|
23
|
+
num_qs = 0
|
24
|
+
num_lines = 0
|
25
|
+
for line in question:
|
26
|
+
num_lines += 1
|
27
|
+
qwords = line.split()
|
28
|
+
# We lowercase all words to correspond to the preprocessing
|
29
|
+
# we applied to our data.
|
30
|
+
qwords = [x.lower().strip() for x in qwords]
|
31
|
+
# If one of the words is not in the vocabulary we skip this question
|
32
|
+
found = True
|
33
|
+
for w in qwords:
|
34
|
+
if w not in words:
|
35
|
+
found = False
|
36
|
+
break
|
37
|
+
if not found:
|
38
|
+
continue
|
39
|
+
# The first three words form the query
|
40
|
+
# We retrieve their word vectors and normalize them
|
41
|
+
query = qwords[:3]
|
42
|
+
query = [model.get_word_vector(x) for x in query]
|
43
|
+
query = [x / np.linalg.norm(x) for x in query]
|
44
|
+
# Get the query vector. Example:
|
45
|
+
# Germany - Berlin + France
|
46
|
+
query = query[1] - query[0] + query[2]
|
47
|
+
# We don't need to rank all the words, only until we found
|
48
|
+
# the first word not equal to our set of query words.
|
49
|
+
ban_set = list(map(lambda x: words.index(x), qwords[:3]))
|
50
|
+
if words[util.find_nearest_neighbor(
|
51
|
+
query, vectors, ban_set, cossims=cossims
|
52
|
+
)] == qwords[3]:
|
53
|
+
correct += 1
|
54
|
+
num_qs += 1
|
55
|
+
return correct, num_qs, num_lines
|
56
|
+
|
57
|
+
|
58
|
+
# We use the same conventions as within compute-accuracy
|
59
|
+
def print_compute_accuracy_score(
|
60
|
+
question, correct, num_qs, total_accuracy, semantic_accuracy,
|
61
|
+
syntactic_accuracy
|
62
|
+
):
|
63
|
+
print(
|
64
|
+
(
|
65
|
+
"{0:>30}: ACCURACY TOP1: {3:.2f} % ({1} / {2})\t Total accuracy: {4:.2f} % Semantic accuracy: {5:.2f} % Syntactic accuracy: {6:.2f} %"
|
66
|
+
).format(
|
67
|
+
question,
|
68
|
+
correct,
|
69
|
+
num_qs,
|
70
|
+
correct / float(num_qs) * 100 if num_qs > 0 else 0,
|
71
|
+
total_accuracy * 100,
|
72
|
+
semantic_accuracy * 100,
|
73
|
+
syntactic_accuracy * 100,
|
74
|
+
)
|
75
|
+
)
|
76
|
+
|
77
|
+
|
78
|
+
if __name__ == "__main__":
|
79
|
+
parser = argparse.ArgumentParser(
|
80
|
+
description=(
|
81
|
+
"compute_accuracy equivalent in Python. "
|
82
|
+
"See https://github.com/tmikolov/word2vec/blob/master/demo-word-accuracy.sh"
|
83
|
+
)
|
84
|
+
)
|
85
|
+
parser.add_argument(
|
86
|
+
"model",
|
87
|
+
help="Model to use",
|
88
|
+
)
|
89
|
+
parser.add_argument(
|
90
|
+
"question_words",
|
91
|
+
help="word questions similar to tmikolov's file (see help for link)",
|
92
|
+
)
|
93
|
+
parser.add_argument(
|
94
|
+
"threshold",
|
95
|
+
help="threshold used to limit number of words used",
|
96
|
+
)
|
97
|
+
args = parser.parse_args()
|
98
|
+
args.threshold = int(args.threshold)
|
99
|
+
|
100
|
+
# Retrieve list of normalized word vectors for the first words up
|
101
|
+
# until the threshold count.
|
102
|
+
f = load_model(args.model)
|
103
|
+
# Gets words with associated frequeny sorted by default by descending order
|
104
|
+
words, freq = f.get_words(include_freq=True)
|
105
|
+
words = words[:args.threshold]
|
106
|
+
vectors = np.zeros((len(words), f.get_dimension()), dtype=float)
|
107
|
+
for i in range(len(words)):
|
108
|
+
wv = f.get_word_vector(words[i])
|
109
|
+
wv = wv / np.linalg.norm(wv)
|
110
|
+
vectors[i] = wv
|
111
|
+
|
112
|
+
total_correct = 0
|
113
|
+
total_qs = 0
|
114
|
+
total_num_lines = 0
|
115
|
+
|
116
|
+
total_se_correct = 0
|
117
|
+
total_se_qs = 0
|
118
|
+
|
119
|
+
total_sy_correct = 0
|
120
|
+
total_sy_qs = 0
|
121
|
+
|
122
|
+
qid = 0
|
123
|
+
questions = []
|
124
|
+
with open(args.question_words, 'r') as fqw:
|
125
|
+
questions = fqw.read().split(':')[1:]
|
126
|
+
# For efficiency preallocate the memory to calculate cosine similarities
|
127
|
+
cossims = np.zeros(len(words), dtype=float)
|
128
|
+
for question in questions:
|
129
|
+
quads = question.split('\n')
|
130
|
+
question = quads[0].strip()
|
131
|
+
quads = quads[1:-1]
|
132
|
+
correct, num_qs, num_lines = process_question(
|
133
|
+
quads, cossims, f, words, vectors
|
134
|
+
)
|
135
|
+
total_qs += num_qs
|
136
|
+
total_correct += correct
|
137
|
+
total_num_lines += num_lines
|
138
|
+
|
139
|
+
if (qid < 5):
|
140
|
+
total_se_correct += correct
|
141
|
+
total_se_qs += num_qs
|
142
|
+
else:
|
143
|
+
total_sy_correct += correct
|
144
|
+
total_sy_qs += num_qs
|
145
|
+
|
146
|
+
print_compute_accuracy_score(
|
147
|
+
question,
|
148
|
+
correct,
|
149
|
+
num_qs,
|
150
|
+
total_correct / float(total_qs) if total_qs > 0 else 0,
|
151
|
+
total_se_correct / float(total_se_qs) if total_se_qs > 0 else 0,
|
152
|
+
total_sy_correct / float(total_sy_qs) if total_sy_qs > 0 else 0,
|
153
|
+
)
|
154
|
+
qid += 1
|
155
|
+
|
156
|
+
print(
|
157
|
+
"Questions seen / total: {0} {1} {2:.2f} %".
|
158
|
+
format(
|
159
|
+
total_qs,
|
160
|
+
total_num_lines,
|
161
|
+
total_qs / total_num_lines * 100 if total_num_lines > 0 else 0,
|
162
|
+
)
|
163
|
+
)
|
@@ -0,0 +1,48 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
|
9
|
+
from __future__ import absolute_import
|
10
|
+
from __future__ import division
|
11
|
+
from __future__ import print_function
|
12
|
+
from __future__ import unicode_literals
|
13
|
+
from __future__ import division, absolute_import, print_function
|
14
|
+
|
15
|
+
from fasttext import load_model
|
16
|
+
import argparse
|
17
|
+
import errno
|
18
|
+
|
19
|
+
if __name__ == "__main__":
|
20
|
+
parser = argparse.ArgumentParser(
|
21
|
+
description=(
|
22
|
+
"Print words or labels and frequency of a model's dictionary"
|
23
|
+
)
|
24
|
+
)
|
25
|
+
parser.add_argument(
|
26
|
+
"model",
|
27
|
+
help="Model to use",
|
28
|
+
)
|
29
|
+
parser.add_argument(
|
30
|
+
"-l",
|
31
|
+
"--labels",
|
32
|
+
help="Print labels instead of words",
|
33
|
+
action='store_true',
|
34
|
+
default=False,
|
35
|
+
)
|
36
|
+
args = parser.parse_args()
|
37
|
+
|
38
|
+
f = load_model(args.model)
|
39
|
+
if args.labels:
|
40
|
+
words, freq = f.get_labels(include_freq=True)
|
41
|
+
else:
|
42
|
+
words, freq = f.get_words(include_freq=True)
|
43
|
+
for w, f in zip(words, freq):
|
44
|
+
try:
|
45
|
+
print(w + "\t" + str(f))
|
46
|
+
except IOError as e:
|
47
|
+
if e.errno == errno.EPIPE:
|
48
|
+
pass
|
@@ -0,0 +1,42 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
|
9
|
+
from __future__ import absolute_import
|
10
|
+
from __future__ import division
|
11
|
+
from __future__ import print_function
|
12
|
+
from __future__ import unicode_literals
|
13
|
+
|
14
|
+
import os
|
15
|
+
from fasttext import train_supervised
|
16
|
+
|
17
|
+
|
18
|
+
def print_results(N, p, r):
|
19
|
+
print("N\t" + str(N))
|
20
|
+
print("P@{}\t{:.3f}".format(1, p))
|
21
|
+
print("R@{}\t{:.3f}".format(1, r))
|
22
|
+
|
23
|
+
if __name__ == "__main__":
|
24
|
+
train_data = os.path.join(os.getenv("DATADIR", ''), 'cooking.train')
|
25
|
+
valid_data = os.path.join(os.getenv("DATADIR", ''), 'cooking.valid')
|
26
|
+
|
27
|
+
# train_supervised uses the same arguments and defaults as the fastText cli
|
28
|
+
model = train_supervised(
|
29
|
+
input=train_data, epoch=25, lr=1.0, wordNgrams=2, verbose=2, minCount=1
|
30
|
+
)
|
31
|
+
print_results(*model.test(valid_data))
|
32
|
+
|
33
|
+
model = train_supervised(
|
34
|
+
input=train_data, epoch=25, lr=1.0, wordNgrams=2, verbose=2, minCount=1,
|
35
|
+
loss="hs"
|
36
|
+
)
|
37
|
+
print_results(*model.test(valid_data))
|
38
|
+
model.save_model("cooking.bin")
|
39
|
+
|
40
|
+
model.quantize(input=train_data, qnorm=True, retrain=True, cutoff=100000)
|
41
|
+
print_results(*model.test(valid_data))
|
42
|
+
model.save_model("cooking.ftz")
|
@@ -0,0 +1,56 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
|
9
|
+
from __future__ import absolute_import
|
10
|
+
from __future__ import division
|
11
|
+
from __future__ import print_function
|
12
|
+
from __future__ import unicode_literals
|
13
|
+
from __future__ import division, absolute_import, print_function
|
14
|
+
|
15
|
+
from fasttext import train_unsupervised
|
16
|
+
import numpy as np
|
17
|
+
import os
|
18
|
+
from scipy import stats
|
19
|
+
|
20
|
+
|
21
|
+
# Because of fasttext we don't need to account for OOV
|
22
|
+
def compute_similarity(data_path):
|
23
|
+
def similarity(v1, v2):
|
24
|
+
n1 = np.linalg.norm(v1)
|
25
|
+
n2 = np.linalg.norm(v2)
|
26
|
+
return np.dot(v1, v2) / n1 / n2
|
27
|
+
|
28
|
+
mysim = []
|
29
|
+
gold = []
|
30
|
+
|
31
|
+
with open(data_path, 'rb') as fin:
|
32
|
+
for line in fin:
|
33
|
+
tline = line.split()
|
34
|
+
word1 = tline[0].lower()
|
35
|
+
word2 = tline[1].lower()
|
36
|
+
|
37
|
+
v1 = model.get_word_vector(word1)
|
38
|
+
v2 = model.get_word_vector(word2)
|
39
|
+
d = similarity(v1, v2)
|
40
|
+
mysim.append(d)
|
41
|
+
gold.append(float(tline[2]))
|
42
|
+
|
43
|
+
corr = stats.spearmanr(mysim, gold)
|
44
|
+
dataset = os.path.basename(data_path)
|
45
|
+
correlation = corr[0] * 100
|
46
|
+
return dataset, correlation, 0
|
47
|
+
|
48
|
+
|
49
|
+
if __name__ == "__main__":
|
50
|
+
model = train_unsupervised(
|
51
|
+
input=os.path.join(os.getenv("DATADIR", ''), 'fil9'),
|
52
|
+
model='skipgram',
|
53
|
+
)
|
54
|
+
model.save_model("fil9.bin")
|
55
|
+
dataset, corr, oov = compute_similarity('rw.txt')
|
56
|
+
print("{0:20s}: {1:2.0f} (OOV: {2:2.0f}%)".format(dataset, corr, 0))
|