fasttext 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +20 -1
- data/lib/fasttext.rb +3 -0
- data/lib/fasttext/classifier.rb +12 -4
- data/lib/fasttext/vectorizer.rb +1 -1
- data/lib/fasttext/version.rb +1 -1
- metadata +4 -473
- data/lib/fasttext/ext.bundle +0 -0
- data/vendor/fastText/CMakeLists.txt +0 -68
- data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
- data/vendor/fastText/CONTRIBUTING.md +0 -32
- data/vendor/fastText/MANIFEST.in +0 -5
- data/vendor/fastText/Makefile +0 -63
- data/vendor/fastText/alignment/README.md +0 -53
- data/vendor/fastText/alignment/align.py +0 -145
- data/vendor/fastText/alignment/eval.py +0 -60
- data/vendor/fastText/alignment/example.sh +0 -51
- data/vendor/fastText/alignment/unsup_align.py +0 -109
- data/vendor/fastText/alignment/utils.py +0 -154
- data/vendor/fastText/classification-example.sh +0 -41
- data/vendor/fastText/classification-results.sh +0 -94
- data/vendor/fastText/crawl/README.md +0 -26
- data/vendor/fastText/crawl/dedup.cc +0 -51
- data/vendor/fastText/crawl/download_crawl.sh +0 -57
- data/vendor/fastText/crawl/filter_dedup.sh +0 -13
- data/vendor/fastText/crawl/filter_utf8.cc +0 -105
- data/vendor/fastText/crawl/process_wet_file.sh +0 -30
- data/vendor/fastText/docs/aligned-vectors.md +0 -64
- data/vendor/fastText/docs/api.md +0 -6
- data/vendor/fastText/docs/cheatsheet.md +0 -66
- data/vendor/fastText/docs/crawl-vectors.md +0 -125
- data/vendor/fastText/docs/dataset.md +0 -6
- data/vendor/fastText/docs/english-vectors.md +0 -53
- data/vendor/fastText/docs/faqs.md +0 -63
- data/vendor/fastText/docs/language-identification.md +0 -47
- data/vendor/fastText/docs/options.md +0 -50
- data/vendor/fastText/docs/pretrained-vectors.md +0 -142
- data/vendor/fastText/docs/python-module.md +0 -314
- data/vendor/fastText/docs/references.md +0 -41
- data/vendor/fastText/docs/supervised-models.md +0 -54
- data/vendor/fastText/docs/supervised-tutorial.md +0 -349
- data/vendor/fastText/docs/support.md +0 -58
- data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
- data/vendor/fastText/eval.py +0 -95
- data/vendor/fastText/get-wikimedia.sh +0 -79
- data/vendor/fastText/python/README.md +0 -322
- data/vendor/fastText/python/README.rst +0 -406
- data/vendor/fastText/python/benchmarks/README.rst +0 -3
- data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
- data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
- data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
- data/vendor/fastText/quantization-example.sh +0 -40
- data/vendor/fastText/runtests.py +0 -60
- data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
- data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
- data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
- data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
- data/vendor/fastText/setup.cfg +0 -2
- data/vendor/fastText/setup.py +0 -203
- data/vendor/fastText/tests/fetch_test_data.sh +0 -202
- data/vendor/fastText/website/README.md +0 -6
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
- data/vendor/fastText/website/core/Footer.js +0 -127
- data/vendor/fastText/website/package.json +0 -12
- data/vendor/fastText/website/pages/en/index.js +0 -286
- data/vendor/fastText/website/sidebars.json +0 -18
- data/vendor/fastText/website/siteConfig.js +0 -102
- data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
- data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
- data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
- data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
- data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
- data/vendor/fastText/website/static/fasttext.css +0 -48
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +0 -57
- data/vendor/fastText/word-vector-example.sh +0 -39
data/lib/fasttext/ext.bundle
DELETED
Binary file
|
@@ -1,68 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2016-present, Facebook, Inc.
|
3
|
-
# All rights reserved.
|
4
|
-
#
|
5
|
-
# This source code is licensed under the MIT license found in the
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
7
|
-
#
|
8
|
-
|
9
|
-
cmake_minimum_required(VERSION 2.8.9)
|
10
|
-
project(fasttext)
|
11
|
-
|
12
|
-
# The version number.
|
13
|
-
set (fasttext_VERSION_MAJOR 0)
|
14
|
-
set (fasttext_VERSION_MINOR 1)
|
15
|
-
|
16
|
-
include_directories(fasttext)
|
17
|
-
|
18
|
-
set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3 -march=native")
|
19
|
-
|
20
|
-
set(HEADER_FILES
|
21
|
-
src/args.h
|
22
|
-
src/densematrix.h
|
23
|
-
src/dictionary.h
|
24
|
-
src/fasttext.h
|
25
|
-
src/loss.h
|
26
|
-
src/matrix.h
|
27
|
-
src/meter.h
|
28
|
-
src/model.h
|
29
|
-
src/productquantizer.h
|
30
|
-
src/quantmatrix.h
|
31
|
-
src/real.h
|
32
|
-
src/utils.h
|
33
|
-
src/vector.h)
|
34
|
-
|
35
|
-
set(SOURCE_FILES
|
36
|
-
src/args.cc
|
37
|
-
src/densematrix.cc
|
38
|
-
src/dictionary.cc
|
39
|
-
src/fasttext.cc
|
40
|
-
src/loss.cc
|
41
|
-
src/main.cc
|
42
|
-
src/matrix.cc
|
43
|
-
src/meter.cc
|
44
|
-
src/model.cc
|
45
|
-
src/productquantizer.cc
|
46
|
-
src/quantmatrix.cc
|
47
|
-
src/utils.cc
|
48
|
-
src/vector.cc)
|
49
|
-
|
50
|
-
add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
|
51
|
-
add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
|
52
|
-
add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
|
53
|
-
set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
|
54
|
-
set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
|
55
|
-
set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
|
56
|
-
POSITION_INDEPENDENT_CODE True)
|
57
|
-
add_executable(fasttext-bin src/main.cc)
|
58
|
-
target_link_libraries(fasttext-bin pthread fasttext-static)
|
59
|
-
set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
|
60
|
-
install (TARGETS fasttext-shared
|
61
|
-
LIBRARY DESTINATION lib)
|
62
|
-
install (TARGETS fasttext-static
|
63
|
-
ARCHIVE DESTINATION lib)
|
64
|
-
install (TARGETS fasttext-static_pic
|
65
|
-
ARCHIVE DESTINATION lib)
|
66
|
-
install (TARGETS fasttext-bin
|
67
|
-
RUNTIME DESTINATION bin
|
68
|
-
PUBLIC_HEADER DESTINATION include/fasttext)
|
@@ -1,32 +0,0 @@
|
|
1
|
-
# Contributing to fastText
|
2
|
-
We want to make contributing to this project as easy and transparent as possible.
|
3
|
-
|
4
|
-
## Issues
|
5
|
-
We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue.
|
6
|
-
|
7
|
-
### Reproducing issues
|
8
|
-
Please make sure that the issue you mention is not a result of one of the existing third-party libraries. For example, please do not post an issue if you encountered an error within a third-party Python library. We can only help you with errors which can be directly reproduced either with our C++ code or the corresponding Python bindings. If you do find an error, please post detailed steps to reproduce it. If we can't reproduce your error, we can't help you fix it.
|
9
|
-
|
10
|
-
## Pull Requests
|
11
|
-
Please post an Issue before submitting a pull request. This might save you some time as it is possible we can't support your contribution, albeit we try our best to accomodate your (planned) work and highly appreciate your time. Generally, it is best to have a pull request emerge from an issue rather than the other way around.
|
12
|
-
|
13
|
-
To create a pull request:
|
14
|
-
|
15
|
-
1. Fork the repo and create your branch from `master`.
|
16
|
-
2. If you've added code that should be tested, add tests.
|
17
|
-
3. If you've changed APIs, update the documentation.
|
18
|
-
4. Ensure the test suite passes.
|
19
|
-
5. Make sure your code lints.
|
20
|
-
6. If you haven't already, complete the Contributor License Agreement ("CLA").
|
21
|
-
|
22
|
-
## Tests
|
23
|
-
First, you will need to make sure you have the required data. For that, please have a look at the fetch_test_data.sh script under tests. Next run the tests using the runtests.py script passing a path to the directory containing the datasets.
|
24
|
-
|
25
|
-
## Contributor License Agreement ("CLA")
|
26
|
-
In order to accept your pull request, we need you to submit a CLA. You only need
|
27
|
-
to do this once to work on any of Facebook's open source projects.
|
28
|
-
|
29
|
-
Complete your CLA here: <https://code.facebook.com/cla>
|
30
|
-
|
31
|
-
## License
|
32
|
-
By contributing to fastText, you agree that your contributions will be licensed under its MIT license.
|
data/vendor/fastText/MANIFEST.in
DELETED
data/vendor/fastText/Makefile
DELETED
@@ -1,63 +0,0 @@
|
|
1
|
-
#
|
2
|
-
# Copyright (c) 2016-present, Facebook, Inc.
|
3
|
-
# All rights reserved.
|
4
|
-
#
|
5
|
-
# This source code is licensed under the MIT license found in the
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
7
|
-
#
|
8
|
-
|
9
|
-
CXX = c++
|
10
|
-
CXXFLAGS = -pthread -std=c++0x -march=native
|
11
|
-
OBJS = args.o matrix.o dictionary.o loss.o productquantizer.o densematrix.o quantmatrix.o vector.o model.o utils.o meter.o fasttext.o
|
12
|
-
INCLUDES = -I.
|
13
|
-
|
14
|
-
opt: CXXFLAGS += -O3 -funroll-loops -DNDEBUG
|
15
|
-
opt: fasttext
|
16
|
-
|
17
|
-
coverage: CXXFLAGS += -O0 -fno-inline -fprofile-arcs --coverage
|
18
|
-
coverage: fasttext
|
19
|
-
|
20
|
-
debug: CXXFLAGS += -g -O0 -fno-inline
|
21
|
-
debug: fasttext
|
22
|
-
|
23
|
-
args.o: src/args.cc src/args.h
|
24
|
-
$(CXX) $(CXXFLAGS) -c src/args.cc
|
25
|
-
|
26
|
-
matrix.o: src/matrix.cc src/matrix.h
|
27
|
-
$(CXX) $(CXXFLAGS) -c src/matrix.cc
|
28
|
-
|
29
|
-
dictionary.o: src/dictionary.cc src/dictionary.h src/args.h
|
30
|
-
$(CXX) $(CXXFLAGS) -c src/dictionary.cc
|
31
|
-
|
32
|
-
loss.o: src/loss.cc src/loss.h src/matrix.h src/real.h
|
33
|
-
$(CXX) $(CXXFLAGS) -c src/loss.cc
|
34
|
-
|
35
|
-
productquantizer.o: src/productquantizer.cc src/productquantizer.h src/utils.h
|
36
|
-
$(CXX) $(CXXFLAGS) -c src/productquantizer.cc
|
37
|
-
|
38
|
-
densematrix.o: src/densematrix.cc src/densematrix.h src/utils.h src/matrix.h
|
39
|
-
$(CXX) $(CXXFLAGS) -c src/densematrix.cc
|
40
|
-
|
41
|
-
quantmatrix.o: src/quantmatrix.cc src/quantmatrix.h src/utils.h src/matrix.h
|
42
|
-
$(CXX) $(CXXFLAGS) -c src/quantmatrix.cc
|
43
|
-
|
44
|
-
vector.o: src/vector.cc src/vector.h src/utils.h
|
45
|
-
$(CXX) $(CXXFLAGS) -c src/vector.cc
|
46
|
-
|
47
|
-
model.o: src/model.cc src/model.h src/args.h
|
48
|
-
$(CXX) $(CXXFLAGS) -c src/model.cc
|
49
|
-
|
50
|
-
utils.o: src/utils.cc src/utils.h
|
51
|
-
$(CXX) $(CXXFLAGS) -c src/utils.cc
|
52
|
-
|
53
|
-
meter.o: src/meter.cc src/meter.h
|
54
|
-
$(CXX) $(CXXFLAGS) -c src/meter.cc
|
55
|
-
|
56
|
-
fasttext.o: src/fasttext.cc src/*.h
|
57
|
-
$(CXX) $(CXXFLAGS) -c src/fasttext.cc
|
58
|
-
|
59
|
-
fasttext: $(OBJS) src/fasttext.cc
|
60
|
-
$(CXX) $(CXXFLAGS) $(OBJS) src/main.cc -o fasttext
|
61
|
-
|
62
|
-
clean:
|
63
|
-
rm -rf *.o *.gcno *.gcda fasttext
|
@@ -1,53 +0,0 @@
|
|
1
|
-
## Alignment of Word Embeddings
|
2
|
-
|
3
|
-
This directory provides code for learning alignments between word embeddings in different languages.
|
4
|
-
|
5
|
-
The code is in Python 3 and requires [NumPy](http://www.numpy.org/).
|
6
|
-
|
7
|
-
The script `example.sh` shows how to use this code to learn and evaluate a bilingual alignment of word embeddings.
|
8
|
-
|
9
|
-
The word embeddings used in [1] can be found on the [fastText project page](https://fasttext.cc) and the supervised bilingual lexicons on the [MUSE project page](https://github.com/facebookresearch/MUSE).
|
10
|
-
|
11
|
-
### Supervised alignment
|
12
|
-
|
13
|
-
The script `align.py` aligns word embeddings from two languages using a bilingual lexicon as supervision.
|
14
|
-
The details of this approach can be found in [1].
|
15
|
-
|
16
|
-
### Unsupervised alignment
|
17
|
-
|
18
|
-
The script `unsup_align.py` aligns word embeddings from two languages without requiring any supervision.
|
19
|
-
The details of this approach can be found in [2].
|
20
|
-
|
21
|
-
In addition to NumPy, the unsupervised method requires the [Python Optimal Transport](https://pot.readthedocs.io/en/stable/) toolbox.
|
22
|
-
|
23
|
-
### Download
|
24
|
-
|
25
|
-
Wikipedia fastText embeddings aligned with our method can be found [here](https://fasttext.cc/docs/en/aligned-vectors.html).
|
26
|
-
|
27
|
-
### References
|
28
|
-
|
29
|
-
If you use the supervised alignment method, please cite:
|
30
|
-
|
31
|
-
[1] A. Joulin, P. Bojanowski, T. Mikolov, H. Jegou, E. Grave, [*Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion*](https://arxiv.org/abs/1804.07745)
|
32
|
-
|
33
|
-
```
|
34
|
-
@InProceedings{joulin2018loss,
|
35
|
-
title={Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion},
|
36
|
-
author={Joulin, Armand and Bojanowski, Piotr and Mikolov, Tomas and J\'egou, Herv\'e and Grave, Edouard},
|
37
|
-
year={2018},
|
38
|
-
booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
|
39
|
-
}
|
40
|
-
```
|
41
|
-
|
42
|
-
If you use the unsupervised alignment method, please cite:
|
43
|
-
|
44
|
-
[2] E. Grave, A. Joulin, Q. Berthet, [*Unsupervised Alignment of Embeddings with Wasserstein Procrustes*](https://arxiv.org/abs/1805.11222)
|
45
|
-
|
46
|
-
```
|
47
|
-
@article{grave2018unsupervised,
|
48
|
-
title={Unsupervised Alignment of Embeddings with Wasserstein Procrustes},
|
49
|
-
author={Grave, Edouard and Joulin, Armand and Berthet, Quentin},
|
50
|
-
journal={arXiv preprint arXiv:1805.11222},
|
51
|
-
year={2018}
|
52
|
-
}
|
53
|
-
```
|
@@ -1,145 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
#
|
4
|
-
# Copyright (c) 2018-present, Facebook, Inc.
|
5
|
-
# All rights reserved.
|
6
|
-
#
|
7
|
-
# This source code is licensed under the license found in the
|
8
|
-
# LICENSE file in the root directory of this source tree.
|
9
|
-
|
10
|
-
import numpy as np
|
11
|
-
import argparse
|
12
|
-
from utils import *
|
13
|
-
import sys
|
14
|
-
|
15
|
-
parser = argparse.ArgumentParser(description='RCSLS for supervised word alignment')
|
16
|
-
|
17
|
-
parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
|
18
|
-
parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
|
19
|
-
parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
|
20
|
-
|
21
|
-
parser.add_argument("--dico_train", type=str, default='', help="train dictionary")
|
22
|
-
parser.add_argument("--dico_test", type=str, default='', help="validation dictionary")
|
23
|
-
|
24
|
-
parser.add_argument("--output", type=str, default='', help="where to save aligned embeddings")
|
25
|
-
|
26
|
-
parser.add_argument("--knn", type=int, default=10, help="number of nearest neighbors in RCSL/CSLS")
|
27
|
-
parser.add_argument("--maxneg", type=int, default=200000, help="Maximum number of negatives for the Extended RCSLS")
|
28
|
-
parser.add_argument("--maxsup", type=int, default=-1, help="Maximum number of training examples")
|
29
|
-
parser.add_argument("--maxload", type=int, default=200000, help="Maximum number of loaded vectors")
|
30
|
-
|
31
|
-
parser.add_argument("--model", type=str, default="none", help="Set of constraints: spectral or none")
|
32
|
-
parser.add_argument("--reg", type=float, default=0.0 , help='regularization parameters')
|
33
|
-
|
34
|
-
parser.add_argument("--lr", type=float, default=1.0, help='learning rate')
|
35
|
-
parser.add_argument("--niter", type=int, default=10, help='number of iterations')
|
36
|
-
parser.add_argument('--sgd', action='store_true', help='use sgd')
|
37
|
-
parser.add_argument("--batchsize", type=int, default=10000, help="batch size for sgd")
|
38
|
-
|
39
|
-
params = parser.parse_args()
|
40
|
-
|
41
|
-
###### SPECIFIC FUNCTIONS ######
|
42
|
-
# functions specific to RCSLS
|
43
|
-
# the rest of the functions are in utils.py
|
44
|
-
|
45
|
-
def getknn(sc, x, y, k=10):
|
46
|
-
sidx = np.argpartition(sc, -k, axis=1)[:, -k:]
|
47
|
-
ytopk = y[sidx.flatten(), :]
|
48
|
-
ytopk = ytopk.reshape(sidx.shape[0], sidx.shape[1], y.shape[1])
|
49
|
-
f = np.sum(sc[np.arange(sc.shape[0])[:, None], sidx])
|
50
|
-
df = np.dot(ytopk.sum(1).T, x)
|
51
|
-
return f / k, df / k
|
52
|
-
|
53
|
-
|
54
|
-
def rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, knn=10):
|
55
|
-
X_trans = np.dot(X_src, R.T)
|
56
|
-
f = 2 * np.sum(X_trans * Y_tgt)
|
57
|
-
df = 2 * np.dot(Y_tgt.T, X_src)
|
58
|
-
fk0, dfk0 = getknn(np.dot(X_trans, Z_tgt.T), X_src, Z_tgt, knn)
|
59
|
-
fk1, dfk1 = getknn(np.dot(np.dot(Z_src, R.T), Y_tgt.T).T, Y_tgt, Z_src, knn)
|
60
|
-
f = f - fk0 -fk1
|
61
|
-
df = df - dfk0 - dfk1.T
|
62
|
-
return -f / X_src.shape[0], -df / X_src.shape[0]
|
63
|
-
|
64
|
-
|
65
|
-
def proj_spectral(R):
|
66
|
-
U, s, V = np.linalg.svd(R)
|
67
|
-
s[s > 1] = 1
|
68
|
-
s[s < 0] = 0
|
69
|
-
return np.dot(U, np.dot(np.diag(s), V))
|
70
|
-
|
71
|
-
|
72
|
-
###### MAIN ######
|
73
|
-
|
74
|
-
# load word embeddings
|
75
|
-
words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
|
76
|
-
words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
|
77
|
-
|
78
|
-
# load validation bilingual lexicon
|
79
|
-
src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
|
80
|
-
|
81
|
-
# word --> vector indices
|
82
|
-
idx_src = idx(words_src)
|
83
|
-
idx_tgt = idx(words_tgt)
|
84
|
-
|
85
|
-
# load train bilingual lexicon
|
86
|
-
pairs = load_pairs(params.dico_train, idx_src, idx_tgt)
|
87
|
-
if params.maxsup > 0 and params.maxsup < len(pairs):
|
88
|
-
pairs = pairs[:params.maxsup]
|
89
|
-
|
90
|
-
# selecting training vector pairs
|
91
|
-
X_src, Y_tgt = select_vectors_from_pairs(x_src, x_tgt, pairs)
|
92
|
-
|
93
|
-
# adding negatives for RCSLS
|
94
|
-
Z_src = x_src[:params.maxneg, :]
|
95
|
-
Z_tgt = x_tgt[:params.maxneg, :]
|
96
|
-
|
97
|
-
# initialization:
|
98
|
-
R = procrustes(X_src, Y_tgt)
|
99
|
-
nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
|
100
|
-
print("[init -- Procrustes] NN: %.4f"%(nnacc))
|
101
|
-
sys.stdout.flush()
|
102
|
-
|
103
|
-
# optimization
|
104
|
-
fold, Rold = 0, []
|
105
|
-
niter, lr = params.niter, params.lr
|
106
|
-
|
107
|
-
for it in range(0, niter + 1):
|
108
|
-
if lr < 1e-4:
|
109
|
-
break
|
110
|
-
|
111
|
-
if params.sgd:
|
112
|
-
indices = np.random.choice(X_src.shape[0], size=params.batchsize, replace=False)
|
113
|
-
f, df = rcsls(X_src[indices, :], Y_tgt[indices, :], Z_src, Z_tgt, R, params.knn)
|
114
|
-
else:
|
115
|
-
f, df = rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, params.knn)
|
116
|
-
|
117
|
-
if params.reg > 0:
|
118
|
-
R *= (1 - lr * params.reg)
|
119
|
-
R -= lr * df
|
120
|
-
if params.model == "spectral":
|
121
|
-
R = proj_spectral(R)
|
122
|
-
|
123
|
-
print("[it=%d] f = %.4f" % (it, f))
|
124
|
-
sys.stdout.flush()
|
125
|
-
|
126
|
-
if f > fold and it > 0 and not params.sgd:
|
127
|
-
lr /= 2
|
128
|
-
f, R = fold, Rold
|
129
|
-
|
130
|
-
fold, Rold = f, R
|
131
|
-
|
132
|
-
if (it > 0 and it % 10 == 0) or it == niter:
|
133
|
-
nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
|
134
|
-
print("[it=%d] NN = %.4f - Coverage = %.4f" % (it, nnacc, len(src2tgt) / lexicon_size))
|
135
|
-
|
136
|
-
nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
|
137
|
-
print("[final] NN = %.4f - Coverage = %.4f" % (nnacc, len(src2tgt) / lexicon_size))
|
138
|
-
|
139
|
-
if params.output != "":
|
140
|
-
print("Saving all aligned vectors at %s" % params.output)
|
141
|
-
words_full, x_full = load_vectors(params.src_emb, maxload=-1, center=params.center, verbose=False)
|
142
|
-
x = np.dot(x_full, R.T)
|
143
|
-
x /= np.linalg.norm(x, axis=1)[:, np.newaxis] + 1e-8
|
144
|
-
save_vectors(params.output, x, words_full)
|
145
|
-
save_matrix(params.output + "-mat", R)
|
@@ -1,60 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python3
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
#
|
4
|
-
# Copyright (c) 2018-present, Facebook, Inc.
|
5
|
-
# All rights reserved.
|
6
|
-
#
|
7
|
-
# This source code is licensed under the license found in the
|
8
|
-
# LICENSE file in the root directory of this source tree.
|
9
|
-
|
10
|
-
import io
|
11
|
-
import numpy as np
|
12
|
-
import argparse
|
13
|
-
from utils import *
|
14
|
-
|
15
|
-
parser = argparse.ArgumentParser(description='Evaluation of word alignment')
|
16
|
-
parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
|
17
|
-
parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
|
18
|
-
parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
|
19
|
-
parser.add_argument("--src_mat", type=str, default='', help="Load source alignment matrix. If none given, the aligment matrix is the identity.")
|
20
|
-
parser.add_argument("--tgt_mat", type=str, default='', help="Load target alignment matrix. If none given, the aligment matrix is the identity.")
|
21
|
-
parser.add_argument("--dico_test", type=str, default='', help="test dictionary")
|
22
|
-
parser.add_argument("--maxload", type=int, default=200000)
|
23
|
-
parser.add_argument("--nomatch", action='store_true', help="no exact match in lexicon")
|
24
|
-
params = parser.parse_args()
|
25
|
-
|
26
|
-
|
27
|
-
###### SPECIFIC FUNCTIONS ######
|
28
|
-
# function specific to evaluation
|
29
|
-
# the rest of the functions are in utils.py
|
30
|
-
|
31
|
-
def load_transform(fname, d1=300, d2=300):
|
32
|
-
fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
|
33
|
-
R = np.zeros([d1, d2])
|
34
|
-
for i, line in enumerate(fin):
|
35
|
-
tokens = line.split(' ')
|
36
|
-
R[i, :] = np.array(tokens[0:d2], dtype=float)
|
37
|
-
return R
|
38
|
-
|
39
|
-
|
40
|
-
###### MAIN ######
|
41
|
-
|
42
|
-
print("Evaluation of alignment on %s" % params.dico_test)
|
43
|
-
if params.nomatch:
|
44
|
-
print("running without exact string matches")
|
45
|
-
|
46
|
-
words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
|
47
|
-
words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
|
48
|
-
|
49
|
-
if params.tgt_mat != "":
|
50
|
-
R_tgt = load_transform(params.tgt_mat)
|
51
|
-
x_tgt = np.dot(x_tgt, R_tgt)
|
52
|
-
if params.src_mat != "":
|
53
|
-
R_src = load_transform(params.src_mat)
|
54
|
-
x_src = np.dot(x_src, R_src)
|
55
|
-
|
56
|
-
src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
|
57
|
-
|
58
|
-
nnacc = compute_nn_accuracy(x_src, x_tgt, src2tgt, lexicon_size=lexicon_size)
|
59
|
-
cslsproc = compute_csls_accuracy(x_src, x_tgt, src2tgt, lexicon_size=lexicon_size)
|
60
|
-
print("NN = %.4f - CSLS = %.4f - Coverage = %.4f" % (nnacc, cslsproc, len(src2tgt) / lexicon_size))
|
@@ -1,51 +0,0 @@
|
|
1
|
-
#!/bin/usr/env sh
|
2
|
-
# Copyright (c) 2018-present, Facebook, Inc.
|
3
|
-
# All rights reserved.
|
4
|
-
#
|
5
|
-
# This source code is licensed under the license found in the
|
6
|
-
# LICENSE file in the root directory of this source tree.
|
7
|
-
|
8
|
-
set -e
|
9
|
-
s=${1:-en}
|
10
|
-
t=${2:-es}
|
11
|
-
echo "Example based on the ${s}->${t} alignment"
|
12
|
-
|
13
|
-
if [ ! -d data/ ]; then
|
14
|
-
mkdir -p data;
|
15
|
-
fi
|
16
|
-
|
17
|
-
if [ ! -d res/ ]; then
|
18
|
-
mkdir -p res;
|
19
|
-
fi
|
20
|
-
|
21
|
-
dico_train=data/${s}-${t}.0-5000.txt
|
22
|
-
if [ ! -f "${dico_train}" ]; then
|
23
|
-
DICO=$(basename -- "${dico_train}")
|
24
|
-
wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
|
25
|
-
fi
|
26
|
-
|
27
|
-
dico_test=data/${s}-${t}.5000-6500.txt
|
28
|
-
if [ ! -f "${dico_test}" ]; then
|
29
|
-
DICO=$(basename -- "${dico_test}")
|
30
|
-
wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
|
31
|
-
fi
|
32
|
-
|
33
|
-
src_emb=data/wiki.${s}.vec
|
34
|
-
if [ ! -f "${src_emb}" ]; then
|
35
|
-
EMB=$(basename -- "${src_emb}")
|
36
|
-
wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
|
37
|
-
fi
|
38
|
-
|
39
|
-
tgt_emb=data/wiki.${t}.vec
|
40
|
-
if [ ! -f "${tgt_emb}" ]; then
|
41
|
-
EMB=$(basename -- "${tgt_emb}")
|
42
|
-
wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
|
43
|
-
fi
|
44
|
-
|
45
|
-
output=res/wiki.${s}-${t}.vec
|
46
|
-
|
47
|
-
python3 align.py --src_emb "${src_emb}" --tgt_emb "${tgt_emb}" \
|
48
|
-
--dico_train "${dico_train}" --dico_test "${dico_test}" --output "${output}" \
|
49
|
-
--lr 25 --niter 10
|
50
|
-
python3 eval.py --src_emb "${output}" --tgt_emb "${tgt_emb}" \
|
51
|
-
--dico_test "${dico_test}"
|