fasttext 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +20 -1
- data/lib/fasttext.rb +3 -0
- data/lib/fasttext/classifier.rb +12 -4
- data/lib/fasttext/vectorizer.rb +1 -1
- data/lib/fasttext/version.rb +1 -1
- metadata +4 -473
- data/lib/fasttext/ext.bundle +0 -0
- data/vendor/fastText/CMakeLists.txt +0 -68
- data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
- data/vendor/fastText/CONTRIBUTING.md +0 -32
- data/vendor/fastText/MANIFEST.in +0 -5
- data/vendor/fastText/Makefile +0 -63
- data/vendor/fastText/alignment/README.md +0 -53
- data/vendor/fastText/alignment/align.py +0 -145
- data/vendor/fastText/alignment/eval.py +0 -60
- data/vendor/fastText/alignment/example.sh +0 -51
- data/vendor/fastText/alignment/unsup_align.py +0 -109
- data/vendor/fastText/alignment/utils.py +0 -154
- data/vendor/fastText/classification-example.sh +0 -41
- data/vendor/fastText/classification-results.sh +0 -94
- data/vendor/fastText/crawl/README.md +0 -26
- data/vendor/fastText/crawl/dedup.cc +0 -51
- data/vendor/fastText/crawl/download_crawl.sh +0 -57
- data/vendor/fastText/crawl/filter_dedup.sh +0 -13
- data/vendor/fastText/crawl/filter_utf8.cc +0 -105
- data/vendor/fastText/crawl/process_wet_file.sh +0 -30
- data/vendor/fastText/docs/aligned-vectors.md +0 -64
- data/vendor/fastText/docs/api.md +0 -6
- data/vendor/fastText/docs/cheatsheet.md +0 -66
- data/vendor/fastText/docs/crawl-vectors.md +0 -125
- data/vendor/fastText/docs/dataset.md +0 -6
- data/vendor/fastText/docs/english-vectors.md +0 -53
- data/vendor/fastText/docs/faqs.md +0 -63
- data/vendor/fastText/docs/language-identification.md +0 -47
- data/vendor/fastText/docs/options.md +0 -50
- data/vendor/fastText/docs/pretrained-vectors.md +0 -142
- data/vendor/fastText/docs/python-module.md +0 -314
- data/vendor/fastText/docs/references.md +0 -41
- data/vendor/fastText/docs/supervised-models.md +0 -54
- data/vendor/fastText/docs/supervised-tutorial.md +0 -349
- data/vendor/fastText/docs/support.md +0 -58
- data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
- data/vendor/fastText/eval.py +0 -95
- data/vendor/fastText/get-wikimedia.sh +0 -79
- data/vendor/fastText/python/README.md +0 -322
- data/vendor/fastText/python/README.rst +0 -406
- data/vendor/fastText/python/benchmarks/README.rst +0 -3
- data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
- data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
- data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
- data/vendor/fastText/quantization-example.sh +0 -40
- data/vendor/fastText/runtests.py +0 -60
- data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
- data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
- data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
- data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
- data/vendor/fastText/setup.cfg +0 -2
- data/vendor/fastText/setup.py +0 -203
- data/vendor/fastText/tests/fetch_test_data.sh +0 -202
- data/vendor/fastText/website/README.md +0 -6
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
- data/vendor/fastText/website/core/Footer.js +0 -127
- data/vendor/fastText/website/package.json +0 -12
- data/vendor/fastText/website/pages/en/index.js +0 -286
- data/vendor/fastText/website/sidebars.json +0 -18
- data/vendor/fastText/website/siteConfig.js +0 -102
- data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
- data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
- data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
- data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
- data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
- data/vendor/fastText/website/static/fasttext.css +0 -48
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +0 -57
- data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,13 +0,0 @@
|
|
1
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the MIT license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
from __future__ import absolute_import
|
8
|
-
from __future__ import division
|
9
|
-
from __future__ import print_function
|
10
|
-
from __future__ import unicode_literals
|
11
|
-
|
12
|
-
from .util import test
|
13
|
-
from .util import find_nearest_neighbor
|
@@ -1,60 +0,0 @@
|
|
1
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the MIT license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
# NOTE: The purpose of this file is not to accumulate all useful utility
|
8
|
-
# functions. This file should contain very commonly used and requested functions
|
9
|
-
# (such as test). If you think you have a function at that level, please create
|
10
|
-
# an issue and we will happily review your suggestion. This file is also not supposed
|
11
|
-
# to pull in dependencies outside of numpy/scipy without very good reasons. For
|
12
|
-
# example, this file should not use sklearn and matplotlib to produce a t-sne
|
13
|
-
# plot of word embeddings or such.
|
14
|
-
|
15
|
-
from __future__ import absolute_import
|
16
|
-
from __future__ import division
|
17
|
-
from __future__ import print_function
|
18
|
-
from __future__ import unicode_literals
|
19
|
-
|
20
|
-
import numpy as np
|
21
|
-
|
22
|
-
|
23
|
-
# TODO: Add example on reproducing model.test with util.test and model.get_line
|
24
|
-
def test(predictions, labels, k=1):
|
25
|
-
"""
|
26
|
-
Return precision and recall modeled after fasttext's test
|
27
|
-
"""
|
28
|
-
precision = 0.0
|
29
|
-
nexamples = 0
|
30
|
-
nlabels = 0
|
31
|
-
for prediction, labels in zip(predictions, labels):
|
32
|
-
for p in prediction:
|
33
|
-
if p in labels:
|
34
|
-
precision += 1
|
35
|
-
nexamples += 1
|
36
|
-
nlabels += len(labels)
|
37
|
-
return (precision / (k * nexamples), precision / nlabels)
|
38
|
-
|
39
|
-
|
40
|
-
def find_nearest_neighbor(query, vectors, ban_set, cossims=None):
|
41
|
-
"""
|
42
|
-
query is a 1d numpy array corresponding to the vector to which you want to
|
43
|
-
find the closest vector
|
44
|
-
vectors is a 2d numpy array corresponding to the vectors you want to consider
|
45
|
-
ban_set is a set of indicies within vectors you want to ignore for nearest match
|
46
|
-
cossims is a 1d numpy array of size len(vectors), which can be passed for efficiency
|
47
|
-
|
48
|
-
returns the index of the closest match to query within vectors
|
49
|
-
|
50
|
-
"""
|
51
|
-
if cossims is None:
|
52
|
-
cossims = np.matmul(vectors, query, out=cossims)
|
53
|
-
else:
|
54
|
-
np.matmul(vectors, query, out=cossims)
|
55
|
-
rank = len(cossims) - 1
|
56
|
-
result_i = np.argpartition(cossims, rank)[rank]
|
57
|
-
while result_i in ban_set:
|
58
|
-
rank -= 1
|
59
|
-
result_i = np.argpartition(cossims, rank)[rank]
|
60
|
-
return result_i
|
@@ -1,40 +0,0 @@
|
|
1
|
-
myshuf() {
|
2
|
-
perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
|
3
|
-
}
|
4
|
-
|
5
|
-
normalize_text() {
|
6
|
-
tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
|
7
|
-
sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
|
8
|
-
-e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
|
9
|
-
-e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
|
10
|
-
}
|
11
|
-
|
12
|
-
RESULTDIR=result
|
13
|
-
DATADIR=data
|
14
|
-
|
15
|
-
mkdir -p "${RESULTDIR}"
|
16
|
-
mkdir -p "${DATADIR}"
|
17
|
-
|
18
|
-
if [ ! -f "${DATADIR}/dbpedia.train" ]
|
19
|
-
then
|
20
|
-
wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "${DATADIR}/dbpedia_csv.tar.gz"
|
21
|
-
tar -xzvf "${DATADIR}/dbpedia_csv.tar.gz" -C "${DATADIR}"
|
22
|
-
cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "${DATADIR}/dbpedia.train"
|
23
|
-
cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "${DATADIR}/dbpedia.test"
|
24
|
-
fi
|
25
|
-
|
26
|
-
make
|
27
|
-
|
28
|
-
echo "Training..."
|
29
|
-
./fasttext supervised -input "${DATADIR}/dbpedia.train" -output "${RESULTDIR}/dbpedia" -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 4
|
30
|
-
|
31
|
-
echo "Quantizing..."
|
32
|
-
./fasttext quantize -output "${RESULTDIR}/dbpedia" -input "${DATADIR}/dbpedia.train" -qnorm -retrain -epoch 1 -cutoff 100000
|
33
|
-
|
34
|
-
echo "Testing original model..."
|
35
|
-
./fasttext test "${RESULTDIR}/dbpedia.bin" "${DATADIR}/dbpedia.test"
|
36
|
-
echo "Testing quantized model..."
|
37
|
-
./fasttext test "${RESULTDIR}/dbpedia.ftz" "${DATADIR}/dbpedia.test"
|
38
|
-
|
39
|
-
wc -c < "${RESULTDIR}/dbpedia.bin" | awk '{print "Size of the original model:\t",$1;}'
|
40
|
-
wc -c < "${RESULTDIR}/dbpedia.ftz" | awk '{print "Size of the quantized model:\t",$1;}'
|
data/vendor/fastText/runtests.py
DELETED
@@ -1,60 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
#
|
4
|
-
# Copyright (c) 2016-present, Facebook, Inc.
|
5
|
-
# All rights reserved.
|
6
|
-
#
|
7
|
-
# This source code is licensed under the MIT license found in the
|
8
|
-
# LICENSE file in the root directory of this source tree.
|
9
|
-
#
|
10
|
-
|
11
|
-
# To run the integration tests you must first fetch all the required test data.
|
12
|
-
# Have a look at tests/fetch_test_data.sh
|
13
|
-
# You will then need to point this script to the corresponding folder
|
14
|
-
|
15
|
-
from __future__ import absolute_import
|
16
|
-
from __future__ import division
|
17
|
-
from __future__ import print_function
|
18
|
-
from __future__ import unicode_literals
|
19
|
-
|
20
|
-
import unittest
|
21
|
-
import argparse
|
22
|
-
from fasttext.tests import gen_tests
|
23
|
-
from fasttext.tests import gen_unit_tests
|
24
|
-
|
25
|
-
|
26
|
-
def run_tests(tests):
|
27
|
-
suite = unittest.TestLoader().loadTestsFromTestCase(tests)
|
28
|
-
unittest.TextTestRunner(verbosity=3).run(suite)
|
29
|
-
|
30
|
-
|
31
|
-
if __name__ == "__main__":
|
32
|
-
parser = argparse.ArgumentParser()
|
33
|
-
parser.add_argument(
|
34
|
-
"-u", "--unit-tests", help="run unit tests", action="store_true"
|
35
|
-
)
|
36
|
-
parser.add_argument(
|
37
|
-
"-i",
|
38
|
-
"--integration-tests",
|
39
|
-
help="run integration tests",
|
40
|
-
action="store_true"
|
41
|
-
)
|
42
|
-
parser.add_argument(
|
43
|
-
"-v",
|
44
|
-
"--verbose",
|
45
|
-
default=1,
|
46
|
-
help="verbosity level (default 1)",
|
47
|
-
type=int,
|
48
|
-
)
|
49
|
-
parser.add_argument("--data-dir", help="Full path to data directory")
|
50
|
-
args = parser.parse_args()
|
51
|
-
if args.unit_tests:
|
52
|
-
run_tests(gen_unit_tests(verbose=args.verbose))
|
53
|
-
if args.integration_tests:
|
54
|
-
if args.data_dir is None:
|
55
|
-
raise ValueError(
|
56
|
-
"Need data directory! Consult tests/fetch_test_data.sh"
|
57
|
-
)
|
58
|
-
run_tests(gen_tests(args.data_dir, verbose=args.verbose))
|
59
|
-
if not args.unit_tests and not args.integration_tests:
|
60
|
-
print("Ran no tests")
|
@@ -1,19 +0,0 @@
|
|
1
|
-
# Fast Linear Model for Knowledge Graph Embeddings
|
2
|
-
|
3
|
-
## Knowledge base completion
|
4
|
-
|
5
|
-
These scripts require the [fastText library](https://github.com/facebookresearch/fastText).
|
6
|
-
|
7
|
-
Run the data.sh script to download and format the datasets. Then run any of the scripts to train and test on a given dataset.
|
8
|
-
|
9
|
-
## Reference
|
10
|
-
|
11
|
-
If you use this code please cite:
|
12
|
-
|
13
|
-
@article{joulin2017fast,
|
14
|
-
title={Fast Linear Model for Knowledge Graph Embeddings},
|
15
|
-
author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Nickel, Maximilian and Mikolov, Tomas},
|
16
|
-
journal={arXiv preprint arXiv:1710.10881},
|
17
|
-
year={2017}
|
18
|
-
}
|
19
|
-
|
@@ -1,69 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
-
# All rights reserved.
|
5
|
-
#
|
6
|
-
# This source code is licensed under the MIT license found in the
|
7
|
-
# LICENSE file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
set -e
|
10
|
-
DATADIR=data/
|
11
|
-
|
12
|
-
if [ ! -d "$DATADIR" ]; then
|
13
|
-
mkdir $DATADIR
|
14
|
-
fi
|
15
|
-
|
16
|
-
cd $DATADIR
|
17
|
-
echo "preparing WN18"
|
18
|
-
#wget -P . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz
|
19
|
-
#mv fetch.php\?media\=en\:wordnet-mlj12.tar.gz wordnet-mlj12.tar.gz
|
20
|
-
wget -P . https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/wordnet-mlj12.tar.gz
|
21
|
-
tar -xzvf wordnet-mlj12.tar.gz
|
22
|
-
DIR=wordnet-mlj12
|
23
|
-
for f in ${DIR}/wordnet-ml*.txt;
|
24
|
-
do
|
25
|
-
fn=${DIR}/ft_$(basename $f)
|
26
|
-
awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
|
27
|
-
done
|
28
|
-
cat ${DIR}/ft_* > ${DIR}/ft_wordnet-mlj12-full.txt
|
29
|
-
cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_wordnet-mlj12-valid+train.txt
|
30
|
-
|
31
|
-
echo "preparing FB15K"
|
32
|
-
#wget https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:fb15k.tgz
|
33
|
-
#mv fetch.php\?media\=en\:fb15k.tgz fb15k.tgz
|
34
|
-
wget https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/fb15k.tgz
|
35
|
-
tar -xzvf fb15k.tgz
|
36
|
-
DIR=FB15k/
|
37
|
-
for f in ${DIR}/freebase*.txt;
|
38
|
-
do
|
39
|
-
fn=${DIR}/ft_$(basename $f)
|
40
|
-
echo $f " --> " $fn
|
41
|
-
awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
|
42
|
-
done
|
43
|
-
cat ${DIR}/ft_* > ${DIR}/ft_freebase_mtr100_mte100-full.txt
|
44
|
-
cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_freebase_mtr100_mte100-valid+train.txt
|
45
|
-
|
46
|
-
echo "preparing FB15K-237"
|
47
|
-
wget https://download.microsoft.com/download/8/7/0/8700516A-AB3D-4850-B4BB-805C515AECE1/FB15K-237.2.zip
|
48
|
-
unzip FB15K-237.2.zip
|
49
|
-
DIR=Release/
|
50
|
-
for f in train.txt test.txt valid.txt
|
51
|
-
do
|
52
|
-
fn=${DIR}/ft_$(basename $f)
|
53
|
-
echo $f " --> " $fn
|
54
|
-
awk -F "\t" '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${DIR}/${f} > ${fn};
|
55
|
-
done
|
56
|
-
cat ${DIR}/ft_*.txt > ${DIR}/ft_full.txt
|
57
|
-
cat ${DIR}/ft_train.txt ${DIR}/ft_valid.txt > ${DIR}/ft_valid+train.txt
|
58
|
-
|
59
|
-
echo "preparing SVO"
|
60
|
-
wget . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:svo-tensor-dataset.tar.gz
|
61
|
-
mv fetch.php?media=en:svo-tensor-dataset.tar.gz svo-tensor-dataset.tar.gz
|
62
|
-
tar -xzvf svo-tensor-dataset.tar.gz
|
63
|
-
DIR=SVO-tensor-dataset
|
64
|
-
for f in ${DIR}/svo_data*.dat;
|
65
|
-
do
|
66
|
-
fn=${DIR}/ft_$(basename $f)
|
67
|
-
awk '{print "0_"$1,"1_"$3,"__label__"$2;}' < ${f} > ${fn};
|
68
|
-
done
|
69
|
-
cat ${DIR}/ft_*train*.dat ${DIR}/ft_*valid*.dat > ${DIR}/ft_svo_data-valid+train.dat
|
@@ -1,108 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) 2017-present, Facebook, Inc.
|
3
|
-
* All rights reserved.
|
4
|
-
*
|
5
|
-
* This source code is licensed under the MIT license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree.
|
7
|
-
*/
|
8
|
-
|
9
|
-
#include <unordered_map>
|
10
|
-
#include <iostream>
|
11
|
-
#include <fstream>
|
12
|
-
#include <string>
|
13
|
-
#include <vector>
|
14
|
-
|
15
|
-
std::string EOS = "</s>";
|
16
|
-
|
17
|
-
bool readWord(std::istream& in, std::string& word)
|
18
|
-
{
|
19
|
-
char c;
|
20
|
-
std::streambuf& sb = *in.rdbuf();
|
21
|
-
word.clear();
|
22
|
-
while ((c = sb.sbumpc()) != EOF) {
|
23
|
-
if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' ||
|
24
|
-
c == '\f' || c == '\0') {
|
25
|
-
if (word.empty()) {
|
26
|
-
if (c == '\n') {
|
27
|
-
word += EOS;
|
28
|
-
return true;
|
29
|
-
}
|
30
|
-
continue;
|
31
|
-
} else {
|
32
|
-
if (c == '\n')
|
33
|
-
sb.sungetc();
|
34
|
-
return true;
|
35
|
-
}
|
36
|
-
}
|
37
|
-
word.push_back(c);
|
38
|
-
}
|
39
|
-
in.get();
|
40
|
-
return !word.empty();
|
41
|
-
}
|
42
|
-
|
43
|
-
int main(int argc, char** argv) {
|
44
|
-
int k = 10;
|
45
|
-
if (argc < 4) {
|
46
|
-
std::cerr<<"eval <pred> <gt> <kb> [<k>]"<<std::endl;
|
47
|
-
exit(1);
|
48
|
-
}
|
49
|
-
if (argc == 5) { k = atoi(argv[4]);}
|
50
|
-
|
51
|
-
std::string predfn(argv[1]);
|
52
|
-
std::ifstream predf(predfn);
|
53
|
-
std::string gtfn(argv[2]);
|
54
|
-
std::ifstream gtf(gtfn);
|
55
|
-
std::string kbfn(argv[3]);
|
56
|
-
std::ifstream kbf(kbfn);
|
57
|
-
|
58
|
-
if (!predf.is_open() || !gtf.is_open() || !kbf.is_open()) {
|
59
|
-
std::cerr << "Files cannot be opened!" << std::endl;
|
60
|
-
exit(EXIT_FAILURE);
|
61
|
-
}
|
62
|
-
|
63
|
-
std::unordered_map< std::string,
|
64
|
-
std::unordered_map< std::string, bool > > KB;
|
65
|
-
|
66
|
-
while (kbf.peek() != EOF) {
|
67
|
-
std::string label, key, word;
|
68
|
-
while (readWord(kbf, word)) {
|
69
|
-
if (word == EOS) {break;}
|
70
|
-
if (word.find("__label__") == 0) {label = word;}
|
71
|
-
else {key += "|" + word;}
|
72
|
-
}
|
73
|
-
KB[key][label] = true;
|
74
|
-
}
|
75
|
-
kbf.close();
|
76
|
-
|
77
|
-
double precision = 0.0;
|
78
|
-
int32_t nexamples = 0;
|
79
|
-
while (predf.peek() != EOF || gtf.peek() != EOF) {
|
80
|
-
if (predf.peek() == EOF || gtf.peek() == EOF) {
|
81
|
-
std::cerr<<"pred / gt files have diff sizes"<<std::endl;
|
82
|
-
exit(1);
|
83
|
-
}
|
84
|
-
std::string label, key, word;
|
85
|
-
|
86
|
-
while (readWord(gtf, word)) {
|
87
|
-
if (word == EOS) {break;}
|
88
|
-
if ( word.find("__label__") == 0) {label = word;}
|
89
|
-
else {key += "|" + word;}
|
90
|
-
}
|
91
|
-
if (KB.find(key) == KB.end()) {
|
92
|
-
std::cerr<<"empty key!"<<std::endl; exit(1);
|
93
|
-
}
|
94
|
-
|
95
|
-
int count = 0;bool eval = true;
|
96
|
-
while (readWord(predf, word)) {
|
97
|
-
if (word == EOS) {break;}
|
98
|
-
if (!eval) {continue;}
|
99
|
-
if (label == word) {precision += 1.0; eval = false;}
|
100
|
-
else if (KB[key].find(word) == KB[key].end()) {count++;}
|
101
|
-
if (count == k) {eval = false;}
|
102
|
-
}
|
103
|
-
nexamples++;
|
104
|
-
}
|
105
|
-
predf.close(); gtf.close();
|
106
|
-
std::cout << "N:\t" << nexamples << std::endl;
|
107
|
-
std::cout << "R@" << k << "\t" << precision / nexamples << std::endl;
|
108
|
-
}
|
@@ -1,49 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# copyright (c) 2017-present, facebook, inc.
|
4
|
-
# all rights reserved.
|
5
|
-
#
|
6
|
-
# this source code is licensed under the MIT license found in the
|
7
|
-
# license file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
# script for FB15k
|
10
|
-
DIR=data/FB15k/
|
11
|
-
FASTTEXTDIR=../../
|
12
|
-
|
13
|
-
# compile
|
14
|
-
pushd $FASTTEXTDIR
|
15
|
-
make opt
|
16
|
-
popd
|
17
|
-
ft=${FASTTEXTDIR}/fasttext
|
18
|
-
|
19
|
-
g++ -std=c++0x eval.cpp -o eval
|
20
|
-
|
21
|
-
## Train model and test it on validation:
|
22
|
-
dim=100
|
23
|
-
epoch=100
|
24
|
-
neg=100
|
25
|
-
model=data/fb15
|
26
|
-
pred=data/fbpred
|
27
|
-
|
28
|
-
echo "---- train ----"
|
29
|
-
$ft supervised -input $DIR/ft_freebase_mtr100_mte100-train.txt \
|
30
|
-
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg -minCount 0
|
31
|
-
|
32
|
-
echo "computing raw hits@10..."
|
33
|
-
$ft test ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10="$2}'
|
34
|
-
|
35
|
-
echo "computing filtered hit@10..."
|
36
|
-
$ft predict ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 20000 > $pred
|
37
|
-
./eval $pred ${DIR}/ft_freebase_mtr100_mte100-test.txt $DIR/ft_freebase_mtr100_mte100-full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|
38
|
-
|
39
|
-
echo "---- train+val ----"
|
40
|
-
|
41
|
-
$ft supervised -input $DIR/ft_freebase_mtr100_mte100-valid+train.txt \
|
42
|
-
-dim ${dim} -epoch ${dim} -output ${model} -lr .2 -thread 20 -loss ns -neg ${neg} -minCount 0
|
43
|
-
|
44
|
-
echo "computing raw hits@10..."
|
45
|
-
$ft test ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10="$2}'
|
46
|
-
|
47
|
-
echo "computing filtered hit@10..."
|
48
|
-
$ft predict ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 20000 > $pred
|
49
|
-
./eval $pred ${DIR}/ft_freebase_mtr100_mte100-test.txt $DIR/ft_freebase_mtr100_mte100-full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|
@@ -1,45 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# copyright (c) 2017-present, facebook, inc.
|
4
|
-
# all rights reserved.
|
5
|
-
#
|
6
|
-
# this source code is licensed under the MIT license found in the
|
7
|
-
# license file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
# script for FB15k237
|
10
|
-
DIR=data/Release/
|
11
|
-
FASTTEXTDIR=../../
|
12
|
-
|
13
|
-
# compile
|
14
|
-
|
15
|
-
pushd $FASTTEXTDIR
|
16
|
-
make opt
|
17
|
-
popd
|
18
|
-
ft=${FASTTEXTDIR}/fasttext
|
19
|
-
|
20
|
-
g++ -std=c++0x eval.cpp -o eval
|
21
|
-
|
22
|
-
## Train model and test it on validation:
|
23
|
-
|
24
|
-
pred=data/fb237pred
|
25
|
-
model=data/fb15k237
|
26
|
-
dim=50
|
27
|
-
epoch=10
|
28
|
-
neg=500
|
29
|
-
|
30
|
-
echo "---- train ----"
|
31
|
-
$ft supervised -input $DIR/ft_train.txt \
|
32
|
-
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg -minCount 0
|
33
|
-
|
34
|
-
echo "computing filtered hit@10..."
|
35
|
-
$ft predict ${model}.bin $DIR/ft_test.txt 20000 > $pred
|
36
|
-
./eval $pred ${DIR}/ft_test.txt $DIR/ft_full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|
37
|
-
|
38
|
-
echo "---- train+val ----"
|
39
|
-
|
40
|
-
$ft supervised -input $DIR/ft_valid+train.txt \
|
41
|
-
-dim ${dim} -epoch ${dim} -output ${model} -lr .2 -thread 20 -loss ns -neg ${neg} -minCount 0
|
42
|
-
|
43
|
-
echo "computing filtered hit@10..."
|
44
|
-
$ft predict ${model}.bin $DIR/ft_test.txt 20000 > $pred
|
45
|
-
./eval $pred ${DIR}/ft_test.txt $DIR/ft_full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
|