fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
@@ -0,0 +1,13 @@
|
|
1
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the MIT license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
from __future__ import absolute_import
|
8
|
+
from __future__ import division
|
9
|
+
from __future__ import print_function
|
10
|
+
from __future__ import unicode_literals
|
11
|
+
|
12
|
+
from .util import test
|
13
|
+
from .util import find_nearest_neighbor
|
@@ -0,0 +1,60 @@
|
|
1
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the MIT license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
# NOTE: The purpose of this file is not to accumulate all useful utility
|
8
|
+
# functions. This file should contain very commonly used and requested functions
|
9
|
+
# (such as test). If you think you have a function at that level, please create
|
10
|
+
# an issue and we will happily review your suggestion. This file is also not supposed
|
11
|
+
# to pull in dependencies outside of numpy/scipy without very good reasons. For
|
12
|
+
# example, this file should not use sklearn and matplotlib to produce a t-sne
|
13
|
+
# plot of word embeddings or such.
|
14
|
+
|
15
|
+
from __future__ import absolute_import
|
16
|
+
from __future__ import division
|
17
|
+
from __future__ import print_function
|
18
|
+
from __future__ import unicode_literals
|
19
|
+
|
20
|
+
import numpy as np
|
21
|
+
|
22
|
+
|
23
|
+
# TODO: Add example on reproducing model.test with util.test and model.get_line
|
24
|
+
def test(predictions, labels, k=1):
|
25
|
+
"""
|
26
|
+
Return precision and recall modeled after fasttext's test
|
27
|
+
"""
|
28
|
+
precision = 0.0
|
29
|
+
nexamples = 0
|
30
|
+
nlabels = 0
|
31
|
+
for prediction, labels in zip(predictions, labels):
|
32
|
+
for p in prediction:
|
33
|
+
if p in labels:
|
34
|
+
precision += 1
|
35
|
+
nexamples += 1
|
36
|
+
nlabels += len(labels)
|
37
|
+
return (precision / (k * nexamples), precision / nlabels)
|
38
|
+
|
39
|
+
|
40
|
+
def find_nearest_neighbor(query, vectors, ban_set, cossims=None):
|
41
|
+
"""
|
42
|
+
query is a 1d numpy array corresponding to the vector to which you want to
|
43
|
+
find the closest vector
|
44
|
+
vectors is a 2d numpy array corresponding to the vectors you want to consider
|
45
|
+
ban_set is a set of indicies within vectors you want to ignore for nearest match
|
46
|
+
cossims is a 1d numpy array of size len(vectors), which can be passed for efficiency
|
47
|
+
|
48
|
+
returns the index of the closest match to query within vectors
|
49
|
+
|
50
|
+
"""
|
51
|
+
if cossims is None:
|
52
|
+
cossims = np.matmul(vectors, query, out=cossims)
|
53
|
+
else:
|
54
|
+
np.matmul(vectors, query, out=cossims)
|
55
|
+
rank = len(cossims) - 1
|
56
|
+
result_i = np.argpartition(cossims, rank)[rank]
|
57
|
+
while result_i in ban_set:
|
58
|
+
rank -= 1
|
59
|
+
result_i = np.argpartition(cossims, rank)[rank]
|
60
|
+
return result_i
|
@@ -0,0 +1,40 @@
|
|
1
|
+
myshuf() {
|
2
|
+
perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
|
3
|
+
}
|
4
|
+
|
5
|
+
normalize_text() {
|
6
|
+
tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
|
7
|
+
sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
|
8
|
+
-e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
|
9
|
+
-e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
|
10
|
+
}
|
11
|
+
|
12
|
+
RESULTDIR=result
|
13
|
+
DATADIR=data
|
14
|
+
|
15
|
+
mkdir -p "${RESULTDIR}"
|
16
|
+
mkdir -p "${DATADIR}"
|
17
|
+
|
18
|
+
if [ ! -f "${DATADIR}/dbpedia.train" ]
|
19
|
+
then
|
20
|
+
wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "${DATADIR}/dbpedia_csv.tar.gz"
|
21
|
+
tar -xzvf "${DATADIR}/dbpedia_csv.tar.gz" -C "${DATADIR}"
|
22
|
+
cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "${DATADIR}/dbpedia.train"
|
23
|
+
cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "${DATADIR}/dbpedia.test"
|
24
|
+
fi
|
25
|
+
|
26
|
+
make
|
27
|
+
|
28
|
+
echo "Training..."
|
29
|
+
./fasttext supervised -input "${DATADIR}/dbpedia.train" -output "${RESULTDIR}/dbpedia" -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 4
|
30
|
+
|
31
|
+
echo "Quantizing..."
|
32
|
+
./fasttext quantize -output "${RESULTDIR}/dbpedia" -input "${DATADIR}/dbpedia.train" -qnorm -retrain -epoch 1 -cutoff 100000
|
33
|
+
|
34
|
+
echo "Testing original model..."
|
35
|
+
./fasttext test "${RESULTDIR}/dbpedia.bin" "${DATADIR}/dbpedia.test"
|
36
|
+
echo "Testing quantized model..."
|
37
|
+
./fasttext test "${RESULTDIR}/dbpedia.ftz" "${DATADIR}/dbpedia.test"
|
38
|
+
|
39
|
+
wc -c < "${RESULTDIR}/dbpedia.bin" | awk '{print "Size of the original model:\t",$1;}'
|
40
|
+
wc -c < "${RESULTDIR}/dbpedia.ftz" | awk '{print "Size of the quantized model:\t",$1;}'
|
@@ -0,0 +1,60 @@
|
|
1
|
+
#!/usr/bin/env python
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
#
|
4
|
+
# Copyright (c) 2016-present, Facebook, Inc.
|
5
|
+
# All rights reserved.
|
6
|
+
#
|
7
|
+
# This source code is licensed under the MIT license found in the
|
8
|
+
# LICENSE file in the root directory of this source tree.
|
9
|
+
#
|
10
|
+
|
11
|
+
# To run the integration tests you must first fetch all the required test data.
|
12
|
+
# Have a look at tests/fetch_test_data.sh
|
13
|
+
# You will then need to point this script to the corresponding folder
|
14
|
+
|
15
|
+
from __future__ import absolute_import
|
16
|
+
from __future__ import division
|
17
|
+
from __future__ import print_function
|
18
|
+
from __future__ import unicode_literals
|
19
|
+
|
20
|
+
import unittest
|
21
|
+
import argparse
|
22
|
+
from fasttext.tests import gen_tests
|
23
|
+
from fasttext.tests import gen_unit_tests
|
24
|
+
|
25
|
+
|
26
|
+
def run_tests(tests):
|
27
|
+
suite = unittest.TestLoader().loadTestsFromTestCase(tests)
|
28
|
+
unittest.TextTestRunner(verbosity=3).run(suite)
|
29
|
+
|
30
|
+
|
31
|
+
if __name__ == "__main__":
|
32
|
+
parser = argparse.ArgumentParser()
|
33
|
+
parser.add_argument(
|
34
|
+
"-u", "--unit-tests", help="run unit tests", action="store_true"
|
35
|
+
)
|
36
|
+
parser.add_argument(
|
37
|
+
"-i",
|
38
|
+
"--integration-tests",
|
39
|
+
help="run integration tests",
|
40
|
+
action="store_true"
|
41
|
+
)
|
42
|
+
parser.add_argument(
|
43
|
+
"-v",
|
44
|
+
"--verbose",
|
45
|
+
default=1,
|
46
|
+
help="verbosity level (default 1)",
|
47
|
+
type=int,
|
48
|
+
)
|
49
|
+
parser.add_argument("--data-dir", help="Full path to data directory")
|
50
|
+
args = parser.parse_args()
|
51
|
+
if args.unit_tests:
|
52
|
+
run_tests(gen_unit_tests(verbose=args.verbose))
|
53
|
+
if args.integration_tests:
|
54
|
+
if args.data_dir is None:
|
55
|
+
raise ValueError(
|
56
|
+
"Need data directory! Consult tests/fetch_test_data.sh"
|
57
|
+
)
|
58
|
+
run_tests(gen_tests(args.data_dir, verbose=args.verbose))
|
59
|
+
if not args.unit_tests and not args.integration_tests:
|
60
|
+
print("Ran no tests")
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# Fast Linear Model for Knowledge Graph Embeddings
|
2
|
+
|
3
|
+
## Knowledge base completion
|
4
|
+
|
5
|
+
These scripts require the [fastText library](https://github.com/facebookresearch/fastText).
|
6
|
+
|
7
|
+
Run the data.sh script to download and format the datasets. Then run any of the scripts to train and test on a given dataset.
|
8
|
+
|
9
|
+
## Reference
|
10
|
+
|
11
|
+
If you use this code please cite:
|
12
|
+
|
13
|
+
@article{joulin2017fast,
|
14
|
+
title={Fast Linear Model for Knowledge Graph Embeddings},
|
15
|
+
author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Nickel, Maximilian and Mikolov, Tomas},
|
16
|
+
journal={arXiv preprint arXiv:1710.10881},
|
17
|
+
year={2017}
|
18
|
+
}
|
19
|
+
|
@@ -0,0 +1,69 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
#
|
3
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
+
# All rights reserved.
|
5
|
+
#
|
6
|
+
# This source code is licensed under the MIT license found in the
|
7
|
+
# LICENSE file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
set -e
|
10
|
+
DATADIR=data/
|
11
|
+
|
12
|
+
if [ ! -d "$DATADIR" ]; then
|
13
|
+
mkdir $DATADIR
|
14
|
+
fi
|
15
|
+
|
16
|
+
cd $DATADIR
|
17
|
+
echo "preparing WN18"
|
18
|
+
#wget -P . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz
|
19
|
+
#mv fetch.php\?media\=en\:wordnet-mlj12.tar.gz wordnet-mlj12.tar.gz
|
20
|
+
wget -P . https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/wordnet-mlj12.tar.gz
|
21
|
+
tar -xzvf wordnet-mlj12.tar.gz
|
22
|
+
DIR=wordnet-mlj12
|
23
|
+
for f in ${DIR}/wordnet-ml*.txt;
|
24
|
+
do
|
25
|
+
fn=${DIR}/ft_$(basename $f)
|
26
|
+
awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
|
27
|
+
done
|
28
|
+
cat ${DIR}/ft_* > ${DIR}/ft_wordnet-mlj12-full.txt
|
29
|
+
cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_wordnet-mlj12-valid+train.txt
|
30
|
+
|
31
|
+
echo "preparing FB15K"
|
32
|
+
#wget https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:fb15k.tgz
|
33
|
+
#mv fetch.php\?media\=en\:fb15k.tgz fb15k.tgz
|
34
|
+
wget https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/fb15k.tgz
|
35
|
+
tar -xzvf fb15k.tgz
|
36
|
+
DIR=FB15k/
|
37
|
+
for f in ${DIR}/freebase*.txt;
|
38
|
+
do
|
39
|
+
fn=${DIR}/ft_$(basename $f)
|
40
|
+
echo $f " --> " $fn
|
41
|
+
awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
|
42
|
+
done
|
43
|
+
cat ${DIR}/ft_* > ${DIR}/ft_freebase_mtr100_mte100-full.txt
|
44
|
+
cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_freebase_mtr100_mte100-valid+train.txt
|
45
|
+
|
46
|
+
echo "preparing FB15K-237"
|
47
|
+
wget https://download.microsoft.com/download/8/7/0/8700516A-AB3D-4850-B4BB-805C515AECE1/FB15K-237.2.zip
|
48
|
+
unzip FB15K-237.2.zip
|
49
|
+
DIR=Release/
|
50
|
+
for f in train.txt test.txt valid.txt
|
51
|
+
do
|
52
|
+
fn=${DIR}/ft_$(basename $f)
|
53
|
+
echo $f " --> " $fn
|
54
|
+
awk -F "\t" '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${DIR}/${f} > ${fn};
|
55
|
+
done
|
56
|
+
cat ${DIR}/ft_*.txt > ${DIR}/ft_full.txt
|
57
|
+
cat ${DIR}/ft_train.txt ${DIR}/ft_valid.txt > ${DIR}/ft_valid+train.txt
|
58
|
+
|
59
|
+
echo "preparing SVO"
|
60
|
+
wget . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:svo-tensor-dataset.tar.gz
|
61
|
+
mv fetch.php?media=en:svo-tensor-dataset.tar.gz svo-tensor-dataset.tar.gz
|
62
|
+
tar -xzvf svo-tensor-dataset.tar.gz
|
63
|
+
DIR=SVO-tensor-dataset
|
64
|
+
for f in ${DIR}/svo_data*.dat;
|
65
|
+
do
|
66
|
+
fn=${DIR}/ft_$(basename $f)
|
67
|
+
awk '{print "0_"$1,"1_"$3,"__label__"$2;}' < ${f} > ${fn};
|
68
|
+
done
|
69
|
+
cat ${DIR}/ft_*train*.dat ${DIR}/ft_*valid*.dat > ${DIR}/ft_svo_data-valid+train.dat
|
@@ -0,0 +1,108 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2017-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include <unordered_map>
|
10
|
+
#include <iostream>
|
11
|
+
#include <fstream>
|
12
|
+
#include <string>
|
13
|
+
#include <vector>
|
14
|
+
|
15
|
+
std::string EOS = "</s>";
|
16
|
+
|
17
|
+
bool readWord(std::istream& in, std::string& word)
|
18
|
+
{
|
19
|
+
char c;
|
20
|
+
std::streambuf& sb = *in.rdbuf();
|
21
|
+
word.clear();
|
22
|
+
while ((c = sb.sbumpc()) != EOF) {
|
23
|
+
if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' ||
|
24
|
+
c == '\f' || c == '\0') {
|
25
|
+
if (word.empty()) {
|
26
|
+
if (c == '\n') {
|
27
|
+
word += EOS;
|
28
|
+
return true;
|
29
|
+
}
|
30
|
+
continue;
|
31
|
+
} else {
|
32
|
+
if (c == '\n')
|
33
|
+
sb.sungetc();
|
34
|
+
return true;
|
35
|
+
}
|
36
|
+
}
|
37
|
+
word.push_back(c);
|
38
|
+
}
|
39
|
+
in.get();
|
40
|
+
return !word.empty();
|
41
|
+
}
|
42
|
+
|
43
|
+
int main(int argc, char** argv) {
|
44
|
+
int k = 10;
|
45
|
+
if (argc < 4) {
|
46
|
+
std::cerr<<"eval <pred> <gt> <kb> [<k>]"<<std::endl;
|
47
|
+
exit(1);
|
48
|
+
}
|
49
|
+
if (argc == 5) { k = atoi(argv[4]);}
|
50
|
+
|
51
|
+
std::string predfn(argv[1]);
|
52
|
+
std::ifstream predf(predfn);
|
53
|
+
std::string gtfn(argv[2]);
|
54
|
+
std::ifstream gtf(gtfn);
|
55
|
+
std::string kbfn(argv[3]);
|
56
|
+
std::ifstream kbf(kbfn);
|
57
|
+
|
58
|
+
if (!predf.is_open() || !gtf.is_open() || !kbf.is_open()) {
|
59
|
+
std::cerr << "Files cannot be opened!" << std::endl;
|
60
|
+
exit(EXIT_FAILURE);
|
61
|
+
}
|
62
|
+
|
63
|
+
std::unordered_map< std::string,
|
64
|
+
std::unordered_map< std::string, bool > > KB;
|
65
|
+
|
66
|
+
while (kbf.peek() != EOF) {
|
67
|
+
std::string label, key, word;
|
68
|
+
while (readWord(kbf, word)) {
|
69
|
+
if (word == EOS) {break;}
|
70
|
+
if (word.find("__label__") == 0) {label = word;}
|
71
|
+
else {key += "|" + word;}
|
72
|
+
}
|
73
|
+
KB[key][label] = true;
|
74
|
+
}
|
75
|
+
kbf.close();
|
76
|
+
|
77
|
+
double precision = 0.0;
|
78
|
+
int32_t nexamples = 0;
|
79
|
+
while (predf.peek() != EOF || gtf.peek() != EOF) {
|
80
|
+
if (predf.peek() == EOF || gtf.peek() == EOF) {
|
81
|
+
std::cerr<<"pred / gt files have diff sizes"<<std::endl;
|
82
|
+
exit(1);
|
83
|
+
}
|
84
|
+
std::string label, key, word;
|
85
|
+
|
86
|
+
while (readWord(gtf, word)) {
|
87
|
+
if (word == EOS) {break;}
|
88
|
+
if ( word.find("__label__") == 0) {label = word;}
|
89
|
+
else {key += "|" + word;}
|
90
|
+
}
|
91
|
+
if (KB.find(key) == KB.end()) {
|
92
|
+
std::cerr<<"empty key!"<<std::endl; exit(1);
|
93
|
+
}
|
94
|
+
|
95
|
+
int count = 0;bool eval = true;
|
96
|
+
while (readWord(predf, word)) {
|
97
|
+
if (word == EOS) {break;}
|
98
|
+
if (!eval) {continue;}
|
99
|
+
if (label == word) {precision += 1.0; eval = false;}
|
100
|
+
else if (KB[key].find(word) == KB[key].end()) {count++;}
|
101
|
+
if (count == k) {eval = false;}
|
102
|
+
}
|
103
|
+
nexamples++;
|
104
|
+
}
|
105
|
+
predf.close(); gtf.close();
|
106
|
+
std::cout << "N:\t" << nexamples << std::endl;
|
107
|
+
std::cout << "R@" << k << "\t" << precision / nexamples << std::endl;
|
108
|
+
}
|
@@ -0,0 +1,49 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
#
|
3
|
+
# copyright (c) 2017-present, facebook, inc.
|
4
|
+
# all rights reserved.
|
5
|
+
#
|
6
|
+
# this source code is licensed under the MIT license found in the
|
7
|
+
# license file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
# script for FB15k
|
10
|
+
DIR=data/FB15k/
|
11
|
+
FASTTEXTDIR=../../
|
12
|
+
|
13
|
+
# compile
|
14
|
+
pushd $FASTTEXTDIR
|
15
|
+
make opt
|
16
|
+
popd
|
17
|
+
ft=${FASTTEXTDIR}/fasttext
|
18
|
+
|
19
|
+
g++ -std=c++0x eval.cpp -o eval
|
20
|
+
|
21
|
+
## Train model and test it on validation:
|
22
|
+
dim=100
|
23
|
+
epoch=100
|
24
|
+
neg=100
|
25
|
+
model=data/fb15
|
26
|
+
pred=data/fbpred
|
27
|
+
|
28
|
+
echo "---- train ----"
|
29
|
+
$ft supervised -input $DIR/ft_freebase_mtr100_mte100-train.txt \
|
30
|
+
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg -minCount 0
|
31
|
+
|
32
|
+
echo "computing raw hits@10..."
|
33
|
+
$ft test ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10="$2}'
|
34
|
+
|
35
|
+
echo "computing filtered hit@10..."
|
36
|
+
$ft predict ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 20000 > $pred
|
37
|
+
./eval $pred ${DIR}/ft_freebase_mtr100_mte100-test.txt $DIR/ft_freebase_mtr100_mte100-full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|
38
|
+
|
39
|
+
echo "---- train+val ----"
|
40
|
+
|
41
|
+
$ft supervised -input $DIR/ft_freebase_mtr100_mte100-valid+train.txt \
|
42
|
+
-dim ${dim} -epoch ${dim} -output ${model} -lr .2 -thread 20 -loss ns -neg ${neg} -minCount 0
|
43
|
+
|
44
|
+
echo "computing raw hits@10..."
|
45
|
+
$ft test ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10="$2}'
|
46
|
+
|
47
|
+
echo "computing filtered hit@10..."
|
48
|
+
$ft predict ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 20000 > $pred
|
49
|
+
./eval $pred ${DIR}/ft_freebase_mtr100_mte100-test.txt $DIR/ft_freebase_mtr100_mte100-full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|
@@ -0,0 +1,45 @@
|
|
1
|
+
#!/usr/bin/env bash
|
2
|
+
#
|
3
|
+
# copyright (c) 2017-present, facebook, inc.
|
4
|
+
# all rights reserved.
|
5
|
+
#
|
6
|
+
# this source code is licensed under the MIT license found in the
|
7
|
+
# license file in the root directory of this source tree.
|
8
|
+
#
|
9
|
+
# script for FB15k237
|
10
|
+
DIR=data/Release/
|
11
|
+
FASTTEXTDIR=../../
|
12
|
+
|
13
|
+
# compile
|
14
|
+
|
15
|
+
pushd $FASTTEXTDIR
|
16
|
+
make opt
|
17
|
+
popd
|
18
|
+
ft=${FASTTEXTDIR}/fasttext
|
19
|
+
|
20
|
+
g++ -std=c++0x eval.cpp -o eval
|
21
|
+
|
22
|
+
## Train model and test it on validation:
|
23
|
+
|
24
|
+
pred=data/fb237pred
|
25
|
+
model=data/fb15k237
|
26
|
+
dim=50
|
27
|
+
epoch=10
|
28
|
+
neg=500
|
29
|
+
|
30
|
+
echo "---- train ----"
|
31
|
+
$ft supervised -input $DIR/ft_train.txt \
|
32
|
+
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg -minCount 0
|
33
|
+
|
34
|
+
echo "computing filtered hit@10..."
|
35
|
+
$ft predict ${model}.bin $DIR/ft_test.txt 20000 > $pred
|
36
|
+
./eval $pred ${DIR}/ft_test.txt $DIR/ft_full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|
37
|
+
|
38
|
+
echo "---- train+val ----"
|
39
|
+
|
40
|
+
$ft supervised -input $DIR/ft_valid+train.txt \
|
41
|
+
-dim ${dim} -epoch ${dim} -output ${model} -lr .2 -thread 20 -loss ns -neg ${neg} -minCount 0
|
42
|
+
|
43
|
+
echo "computing filtered hit@10..."
|
44
|
+
$ft predict ${model}.bin $DIR/ft_test.txt 20000 > $pred
|
45
|
+
./eval $pred ${DIR}/ft_test.txt $DIR/ft_full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|