fasttext 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +20 -1
- data/lib/fasttext.rb +3 -0
- data/lib/fasttext/classifier.rb +12 -4
- data/lib/fasttext/vectorizer.rb +1 -1
- data/lib/fasttext/version.rb +1 -1
- metadata +4 -473
- data/lib/fasttext/ext.bundle +0 -0
- data/vendor/fastText/CMakeLists.txt +0 -68
- data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
- data/vendor/fastText/CONTRIBUTING.md +0 -32
- data/vendor/fastText/MANIFEST.in +0 -5
- data/vendor/fastText/Makefile +0 -63
- data/vendor/fastText/alignment/README.md +0 -53
- data/vendor/fastText/alignment/align.py +0 -145
- data/vendor/fastText/alignment/eval.py +0 -60
- data/vendor/fastText/alignment/example.sh +0 -51
- data/vendor/fastText/alignment/unsup_align.py +0 -109
- data/vendor/fastText/alignment/utils.py +0 -154
- data/vendor/fastText/classification-example.sh +0 -41
- data/vendor/fastText/classification-results.sh +0 -94
- data/vendor/fastText/crawl/README.md +0 -26
- data/vendor/fastText/crawl/dedup.cc +0 -51
- data/vendor/fastText/crawl/download_crawl.sh +0 -57
- data/vendor/fastText/crawl/filter_dedup.sh +0 -13
- data/vendor/fastText/crawl/filter_utf8.cc +0 -105
- data/vendor/fastText/crawl/process_wet_file.sh +0 -30
- data/vendor/fastText/docs/aligned-vectors.md +0 -64
- data/vendor/fastText/docs/api.md +0 -6
- data/vendor/fastText/docs/cheatsheet.md +0 -66
- data/vendor/fastText/docs/crawl-vectors.md +0 -125
- data/vendor/fastText/docs/dataset.md +0 -6
- data/vendor/fastText/docs/english-vectors.md +0 -53
- data/vendor/fastText/docs/faqs.md +0 -63
- data/vendor/fastText/docs/language-identification.md +0 -47
- data/vendor/fastText/docs/options.md +0 -50
- data/vendor/fastText/docs/pretrained-vectors.md +0 -142
- data/vendor/fastText/docs/python-module.md +0 -314
- data/vendor/fastText/docs/references.md +0 -41
- data/vendor/fastText/docs/supervised-models.md +0 -54
- data/vendor/fastText/docs/supervised-tutorial.md +0 -349
- data/vendor/fastText/docs/support.md +0 -58
- data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
- data/vendor/fastText/eval.py +0 -95
- data/vendor/fastText/get-wikimedia.sh +0 -79
- data/vendor/fastText/python/README.md +0 -322
- data/vendor/fastText/python/README.rst +0 -406
- data/vendor/fastText/python/benchmarks/README.rst +0 -3
- data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
- data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
- data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
- data/vendor/fastText/quantization-example.sh +0 -40
- data/vendor/fastText/runtests.py +0 -60
- data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
- data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
- data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
- data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
- data/vendor/fastText/setup.cfg +0 -2
- data/vendor/fastText/setup.py +0 -203
- data/vendor/fastText/tests/fetch_test_data.sh +0 -202
- data/vendor/fastText/website/README.md +0 -6
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
- data/vendor/fastText/website/core/Footer.js +0 -127
- data/vendor/fastText/website/package.json +0 -12
- data/vendor/fastText/website/pages/en/index.js +0 -286
- data/vendor/fastText/website/sidebars.json +0 -18
- data/vendor/fastText/website/siteConfig.js +0 -102
- data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
- data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
- data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
- data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
- data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
- data/vendor/fastText/website/static/fasttext.css +0 -48
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +0 -57
- data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,38 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# copyright (c) 2017-present, facebook, inc.
|
4
|
-
# all rights reserved.
|
5
|
-
#
|
6
|
-
# this source code is licensed under the MIT license found in the
|
7
|
-
# license file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
# script for SVO
|
10
|
-
DIR=data/SVO-tensor-dataset
|
11
|
-
FASTTEXTDIR=../../
|
12
|
-
|
13
|
-
# compile
|
14
|
-
pushd $FASTTEXTDIR
|
15
|
-
make opt
|
16
|
-
popd
|
17
|
-
ft=${FASTTEXTDIR}/fasttext
|
18
|
-
|
19
|
-
## Train model and test it on validation:
|
20
|
-
|
21
|
-
dim=200
|
22
|
-
epoch=3
|
23
|
-
model=svo
|
24
|
-
|
25
|
-
echo "---- train ----"
|
26
|
-
time $ft supervised -input ${DIR}/ft_svo_data_train_1000000.dat \
|
27
|
-
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
|
28
|
-
|
29
|
-
echo "computing raw hit@5%..."
|
30
|
-
$ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
|
31
|
-
|
32
|
-
|
33
|
-
echo "---- train + valid ----"
|
34
|
-
time $ft supervised -input ${DIR}/ft_svo_data-valid+train.dat \
|
35
|
-
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
|
36
|
-
|
37
|
-
echo "computing raw hit@5%..."
|
38
|
-
$ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
|
@@ -1,49 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# copyright (c) 2017-present, facebook, inc.
|
4
|
-
# all rights reserved.
|
5
|
-
#
|
6
|
-
# this source code is licensed under the MIT license found in the
|
7
|
-
# license file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
# script for WN11
|
10
|
-
DIR=data/wordnet-mlj12/
|
11
|
-
FASTTEXTDIR=../../
|
12
|
-
|
13
|
-
# compile
|
14
|
-
|
15
|
-
pushd $FASTTEXTDIR
|
16
|
-
make opt
|
17
|
-
popd
|
18
|
-
ft=${FASTTEXTDIR}/fasttext
|
19
|
-
|
20
|
-
g++ -std=c++0x eval.cpp -o eval
|
21
|
-
|
22
|
-
# Train model and test it:
|
23
|
-
dim=100
|
24
|
-
epoch=100
|
25
|
-
neg=500
|
26
|
-
model=data/wn
|
27
|
-
pred=data/wnpred
|
28
|
-
|
29
|
-
echo "---- train ----"
|
30
|
-
$ft supervised -input ${DIR}/ft_wordnet-mlj12-train.txt \
|
31
|
-
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
|
32
|
-
|
33
|
-
echo "computing raw hits@10..."
|
34
|
-
$ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
|
35
|
-
|
36
|
-
echo "computing filtered hit@10..."
|
37
|
-
$ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
|
38
|
-
./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
|
39
|
-
|
40
|
-
echo "---- train+val ----"
|
41
|
-
$ft supervised -input ${DIR}/ft_wordnet-mlj12-valid+train.txt \
|
42
|
-
-dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
|
43
|
-
|
44
|
-
echo "computing raw hits@10..."
|
45
|
-
$ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
|
46
|
-
|
47
|
-
echo "computing filtered hit@10..."
|
48
|
-
$ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
|
49
|
-
./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
|
@@ -1,43 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# Copyright (c) 2016-present, Facebook, Inc.
|
4
|
-
# All rights reserved.
|
5
|
-
#
|
6
|
-
# This source code is licensed under the MIT license found in the
|
7
|
-
# LICENSE file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
|
10
|
-
# This script applies quantization to the models from Table 1 in:
|
11
|
-
# Bag of Tricks for Efficient Text Classification, arXiv 1607.01759, 2016
|
12
|
-
|
13
|
-
set -e
|
14
|
-
|
15
|
-
DATASET=(
|
16
|
-
ag_news
|
17
|
-
sogou_news
|
18
|
-
dbpedia
|
19
|
-
yelp_review_polarity
|
20
|
-
yelp_review_full
|
21
|
-
yahoo_answers
|
22
|
-
amazon_review_full
|
23
|
-
amazon_review_polarity
|
24
|
-
)
|
25
|
-
|
26
|
-
# These learning rates were chosen by validation on a subset of the training set.
|
27
|
-
LR=( 0.25 0.5 0.5 0.1 0.1 0.1 0.05 0.05 )
|
28
|
-
|
29
|
-
RESULTDIR=result
|
30
|
-
DATADIR=data
|
31
|
-
|
32
|
-
echo 'Warning! Make sure you run the classification-results.sh script before this one'
|
33
|
-
echo 'Otherwise you can expect the commands in this script to fail'
|
34
|
-
|
35
|
-
for i in {0..7}
|
36
|
-
do
|
37
|
-
echo "Working on dataset ${DATASET[i]}"
|
38
|
-
../../fasttext quantize -input "${DATADIR}/${DATASET[i]}.train" \
|
39
|
-
-output "${RESULTDIR}/${DATASET[i]}" -lr "${LR[i]}" \
|
40
|
-
-thread 4 -qnorm -retrain -epoch 5 -cutoff 100000 > /dev/null
|
41
|
-
../../fasttext test "${RESULTDIR}/${DATASET[i]}.ftz" \
|
42
|
-
"${DATADIR}/${DATASET[i]}.test"
|
43
|
-
done
|
data/vendor/fastText/setup.cfg
DELETED
data/vendor/fastText/setup.py
DELETED
@@ -1,203 +0,0 @@
|
|
1
|
-
#!/usr/bin/env python
|
2
|
-
|
3
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
4
|
-
# All rights reserved.
|
5
|
-
#
|
6
|
-
# This source code is licensed under the MIT license found in the
|
7
|
-
# LICENSE file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
|
10
|
-
from __future__ import absolute_import
|
11
|
-
from __future__ import division
|
12
|
-
from __future__ import print_function
|
13
|
-
from __future__ import unicode_literals
|
14
|
-
|
15
|
-
from setuptools import setup, Extension
|
16
|
-
from setuptools.command.build_ext import build_ext
|
17
|
-
import sys
|
18
|
-
import setuptools
|
19
|
-
import os
|
20
|
-
import subprocess
|
21
|
-
import platform
|
22
|
-
import io
|
23
|
-
|
24
|
-
__version__ = '0.9.1'
|
25
|
-
FASTTEXT_SRC = "src"
|
26
|
-
|
27
|
-
# Based on https://github.com/pybind/python_example
|
28
|
-
|
29
|
-
class get_pybind_include(object):
|
30
|
-
"""Helper class to determine the pybind11 include path
|
31
|
-
|
32
|
-
The purpose of this class is to postpone importing pybind11
|
33
|
-
until it is actually installed, so that the ``get_include()``
|
34
|
-
method can be invoked. """
|
35
|
-
|
36
|
-
def __init__(self, user=False):
|
37
|
-
try:
|
38
|
-
import pybind11
|
39
|
-
except ImportError:
|
40
|
-
if subprocess.call([sys.executable, '-m', 'pip', 'install', 'pybind11']):
|
41
|
-
raise RuntimeError('pybind11 install failed.')
|
42
|
-
|
43
|
-
self.user = user
|
44
|
-
|
45
|
-
def __str__(self):
|
46
|
-
import pybind11
|
47
|
-
return pybind11.get_include(self.user)
|
48
|
-
|
49
|
-
try:
|
50
|
-
coverage_index = sys.argv.index('--coverage')
|
51
|
-
except ValueError:
|
52
|
-
coverage = False
|
53
|
-
else:
|
54
|
-
del sys.argv[coverage_index]
|
55
|
-
coverage = True
|
56
|
-
|
57
|
-
fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
|
58
|
-
fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))
|
59
|
-
|
60
|
-
fasttext_src_cc = list(
|
61
|
-
map(lambda x: str(os.path.join(FASTTEXT_SRC, x)), fasttext_src_cc)
|
62
|
-
)
|
63
|
-
|
64
|
-
ext_modules = [
|
65
|
-
Extension(
|
66
|
-
str('fasttext_pybind'),
|
67
|
-
[
|
68
|
-
str('python/fasttext_module/fasttext/pybind/fasttext_pybind.cc'),
|
69
|
-
] + fasttext_src_cc,
|
70
|
-
include_dirs=[
|
71
|
-
# Path to pybind11 headers
|
72
|
-
get_pybind_include(),
|
73
|
-
get_pybind_include(user=True),
|
74
|
-
# Path to fasttext source code
|
75
|
-
FASTTEXT_SRC,
|
76
|
-
],
|
77
|
-
language='c++',
|
78
|
-
extra_compile_args=["-O0 -fno-inline -fprofile-arcs -pthread -march=native" if coverage else
|
79
|
-
"-O3 -funroll-loops -pthread -march=native"],
|
80
|
-
),
|
81
|
-
]
|
82
|
-
|
83
|
-
|
84
|
-
# As of Python 3.6, CCompiler has a `has_flag` method.
|
85
|
-
# cf http://bugs.python.org/issue26689
|
86
|
-
def has_flag(compiler, flags):
|
87
|
-
"""Return a boolean indicating whether a flag name is supported on
|
88
|
-
the specified compiler.
|
89
|
-
"""
|
90
|
-
import tempfile
|
91
|
-
with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
|
92
|
-
f.write('int main (int argc, char **argv) { return 0; }')
|
93
|
-
try:
|
94
|
-
compiler.compile([f.name], extra_postargs=flags)
|
95
|
-
except setuptools.distutils.errors.CompileError:
|
96
|
-
return False
|
97
|
-
return True
|
98
|
-
|
99
|
-
|
100
|
-
def cpp_flag(compiler):
|
101
|
-
"""Return the -std=c++[0x/11/14] compiler flag.
|
102
|
-
The c++14 is preferred over c++0x/11 (when it is available).
|
103
|
-
"""
|
104
|
-
standards = ['-std=c++14', '-std=c++11', '-std=c++0x']
|
105
|
-
for standard in standards:
|
106
|
-
if has_flag(compiler, [standard]):
|
107
|
-
return standard
|
108
|
-
raise RuntimeError(
|
109
|
-
'Unsupported compiler -- at least C++0x support '
|
110
|
-
'is needed!'
|
111
|
-
)
|
112
|
-
|
113
|
-
|
114
|
-
class BuildExt(build_ext):
|
115
|
-
"""A custom build extension for adding compiler-specific options."""
|
116
|
-
c_opts = {
|
117
|
-
'msvc': ['/EHsc'],
|
118
|
-
'unix': [],
|
119
|
-
}
|
120
|
-
|
121
|
-
def build_extensions(self):
|
122
|
-
if sys.platform == 'darwin':
|
123
|
-
mac_osx_version = float('.'.join(platform.mac_ver()[0].split('.')[:2]))
|
124
|
-
os.environ['MACOSX_DEPLOYMENT_TARGET'] = str(mac_osx_version)
|
125
|
-
all_flags = ['-stdlib=libc++', '-mmacosx-version-min=10.7']
|
126
|
-
if has_flag(self.compiler, [all_flags[0]]):
|
127
|
-
self.c_opts['unix'] += [all_flags[0]]
|
128
|
-
elif has_flag(self.compiler, all_flags):
|
129
|
-
self.c_opts['unix'] += all_flags
|
130
|
-
else:
|
131
|
-
raise RuntimeError(
|
132
|
-
'libc++ is needed! Failed to compile with {} and {}.'.
|
133
|
-
format(" ".join(all_flags), all_flags[0])
|
134
|
-
)
|
135
|
-
ct = self.compiler.compiler_type
|
136
|
-
opts = self.c_opts.get(ct, [])
|
137
|
-
extra_link_args = []
|
138
|
-
|
139
|
-
if coverage:
|
140
|
-
coverage_option = '--coverage'
|
141
|
-
opts.append(coverage_option)
|
142
|
-
extra_link_args.append(coverage_option)
|
143
|
-
|
144
|
-
if ct == 'unix':
|
145
|
-
opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
|
146
|
-
opts.append(cpp_flag(self.compiler))
|
147
|
-
if has_flag(self.compiler, ['-fvisibility=hidden']):
|
148
|
-
opts.append('-fvisibility=hidden')
|
149
|
-
elif ct == 'msvc':
|
150
|
-
opts.append(
|
151
|
-
'/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()
|
152
|
-
)
|
153
|
-
for ext in self.extensions:
|
154
|
-
ext.extra_compile_args = opts
|
155
|
-
ext.extra_link_args = extra_link_args
|
156
|
-
build_ext.build_extensions(self)
|
157
|
-
|
158
|
-
|
159
|
-
def _get_readme():
|
160
|
-
"""
|
161
|
-
Use pandoc to generate rst from md.
|
162
|
-
pandoc --from=markdown --to=rst --output=python/README.rst python/README.md
|
163
|
-
"""
|
164
|
-
with io.open("python/README.rst", encoding='utf-8') as fid:
|
165
|
-
return fid.read()
|
166
|
-
|
167
|
-
|
168
|
-
setup(
|
169
|
-
name='fasttext',
|
170
|
-
version=__version__,
|
171
|
-
author='Onur Celebi',
|
172
|
-
author_email='celebio@fb.com',
|
173
|
-
description='fasttext Python bindings',
|
174
|
-
long_description=_get_readme(),
|
175
|
-
ext_modules=ext_modules,
|
176
|
-
url='https://github.com/facebookresearch/fastText',
|
177
|
-
license='MIT',
|
178
|
-
classifiers=[
|
179
|
-
'Development Status :: 3 - Alpha',
|
180
|
-
'Intended Audience :: Developers',
|
181
|
-
'Intended Audience :: Science/Research',
|
182
|
-
'License :: OSI Approved :: MIT License',
|
183
|
-
'Programming Language :: Python :: 2.7',
|
184
|
-
'Programming Language :: Python :: 3.4',
|
185
|
-
'Programming Language :: Python :: 3.5',
|
186
|
-
'Programming Language :: Python :: 3.6',
|
187
|
-
'Topic :: Software Development',
|
188
|
-
'Topic :: Scientific/Engineering',
|
189
|
-
'Operating System :: Microsoft :: Windows',
|
190
|
-
'Operating System :: POSIX',
|
191
|
-
'Operating System :: Unix',
|
192
|
-
'Operating System :: MacOS',
|
193
|
-
],
|
194
|
-
install_requires=['pybind11>=2.2', "setuptools >= 0.7.0", "numpy"],
|
195
|
-
cmdclass={'build_ext': BuildExt},
|
196
|
-
packages=[
|
197
|
-
str('fasttext'),
|
198
|
-
str('fasttext.util'),
|
199
|
-
str('fasttext.tests'),
|
200
|
-
],
|
201
|
-
package_dir={str(''): str('python/fasttext_module')},
|
202
|
-
zip_safe=False,
|
203
|
-
)
|
@@ -1,202 +0,0 @@
|
|
1
|
-
#!/usr/bin/env bash
|
2
|
-
#
|
3
|
-
# Copyright (c) 2016-present, Facebook, Inc.
|
4
|
-
# All rights reserved.
|
5
|
-
#
|
6
|
-
# This source code is licensed under the MIT license found in the
|
7
|
-
# LICENSE file in the root directory of this source tree.
|
8
|
-
#
|
9
|
-
|
10
|
-
DATADIR=${DATADIR:-data}
|
11
|
-
|
12
|
-
report_error() {
|
13
|
-
echo "Error on line $1 of $0"
|
14
|
-
}
|
15
|
-
|
16
|
-
myshuf() {
|
17
|
-
perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
|
18
|
-
}
|
19
|
-
|
20
|
-
normalize_text() {
|
21
|
-
tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
|
22
|
-
sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
|
23
|
-
-e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
|
24
|
-
-e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
|
25
|
-
}
|
26
|
-
|
27
|
-
set -e
|
28
|
-
trap 'report_error $LINENO' ERR
|
29
|
-
|
30
|
-
mkdir -p "${DATADIR}"
|
31
|
-
|
32
|
-
|
33
|
-
# Unsupervised datasets
|
34
|
-
|
35
|
-
data_result="${DATADIR}/rw_queries.txt"
|
36
|
-
if [ ! -f "$data_result" ]
|
37
|
-
then
|
38
|
-
cut -f 1,2 "${DATADIR}"/rw/rw.txt | awk '{print tolower($0)}' | tr '\t' '\n' > "$data_result" || rm -f "$data_result"
|
39
|
-
fi
|
40
|
-
|
41
|
-
data_result="${DATADIR}/enwik9.zip"
|
42
|
-
if [ ! -f "$data_result" ] || \
|
43
|
-
[ $(md5sum "$data_result" | cut -f 1 -d ' ') != "3e773f8a1577fda2e27f871ca17f31fd" ]
|
44
|
-
then
|
45
|
-
wget -c http://mattmahoney.net/dc/enwik9.zip -P "${DATADIR}" || rm -f "$data_result"
|
46
|
-
unzip "$data_result" -d "${DATADIR}" || rm -f "$data_result"
|
47
|
-
fi
|
48
|
-
|
49
|
-
data_result="${DATADIR}/fil9"
|
50
|
-
if [ ! -f "$data_result" ]
|
51
|
-
then
|
52
|
-
perl wikifil.pl "${DATADIR}/enwik9" > "$data_result" || rm -f "$data_result"
|
53
|
-
fi
|
54
|
-
|
55
|
-
data_result="${DATADIR}/rw/rw.txt"
|
56
|
-
if [ ! -f "$data_result" ]
|
57
|
-
then
|
58
|
-
wget -c https://nlp.stanford.edu/~lmthang/morphoNLM/rw.zip -P "${DATADIR}"
|
59
|
-
unzip "${DATADIR}/rw.zip" -d "${DATADIR}" || rm -f "$data_result"
|
60
|
-
fi
|
61
|
-
|
62
|
-
# Supervised datasets
|
63
|
-
# Each datasets comes with a .train and a .test to measure performance
|
64
|
-
|
65
|
-
echo "Downloading dataset dbpedia"
|
66
|
-
|
67
|
-
data_result="${DATADIR}/dbpedia_csv.tar.gz"
|
68
|
-
if [ ! -f "$data_result" ] || \
|
69
|
-
[ $(md5sum "$data_result" | cut -f 1 -d ' ') != "8139d58cf075c7f70d085358e73af9b3" ]
|
70
|
-
then
|
71
|
-
wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "$data_result"
|
72
|
-
tar -xzvf "$data_result" -C "${DATADIR}"
|
73
|
-
fi
|
74
|
-
|
75
|
-
data_result="${DATADIR}/dbpedia.train"
|
76
|
-
if [ ! -f "$data_result" ]
|
77
|
-
then
|
78
|
-
cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "$data_result" || rm -f "$data_result"
|
79
|
-
fi
|
80
|
-
|
81
|
-
data_result="${DATADIR}/dbpedia.test"
|
82
|
-
if [ ! -f "$data_result" ]
|
83
|
-
then
|
84
|
-
cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "$data_result" || rm -f "$data_result"
|
85
|
-
fi
|
86
|
-
|
87
|
-
echo "Downloading dataset tatoeba for langid"
|
88
|
-
|
89
|
-
data_result="${DATADIR}"/langid/all.txt
|
90
|
-
if [ ! -f "$data_result" ]
|
91
|
-
then
|
92
|
-
mkdir -p "${DATADIR}"/langid
|
93
|
-
wget http://downloads.tatoeba.org/exports/sentences.tar.bz2 -O "${DATADIR}"/langid/sentences.tar.bz2
|
94
|
-
tar xvfj "${DATADIR}"/langid/sentences.tar.bz2 --directory "${DATADIR}"/langid || exit 1
|
95
|
-
awk -F"\t" '{print"__label__"$2" "$3}' < "${DATADIR}"/langid/sentences.csv | shuf > "$data_result"
|
96
|
-
fi
|
97
|
-
|
98
|
-
data_result="${DATADIR}/langid.train"
|
99
|
-
if [ ! -f "$data_result" ]
|
100
|
-
then
|
101
|
-
tail -n +10001 "${DATADIR}"/langid/all.txt > "$data_result"
|
102
|
-
fi
|
103
|
-
|
104
|
-
data_result="${DATADIR}/langid.valid"
|
105
|
-
if [ ! -f "$data_result" ]
|
106
|
-
then
|
107
|
-
head -n 10000 "${DATADIR}"/langid/all.txt > "$data_result"
|
108
|
-
fi
|
109
|
-
|
110
|
-
echo "Downloading cooking dataset"
|
111
|
-
|
112
|
-
data_result="${DATADIR}"/cooking/cooking.stackexchange.txt
|
113
|
-
if [ ! -f "$data_result" ]
|
114
|
-
then
|
115
|
-
mkdir -p "${DATADIR}"/cooking/
|
116
|
-
wget https://dl.fbaipublicfiles.com/fasttext/data/cooking.stackexchange.tar.gz -O "${DATADIR}"/cooking/cooking.stackexchange.tar.gz
|
117
|
-
tar xvzf "${DATADIR}"/cooking/cooking.stackexchange.tar.gz --directory "${DATADIR}"/cooking || exit 1
|
118
|
-
cat "${DATADIR}"/cooking/cooking.stackexchange.txt | sed -e "s/\([.\!?,'/()]\)/ \1 /g" | tr "[:upper:]" "[:lower:]" > "${DATADIR}"/cooking/cooking.preprocessed.txt
|
119
|
-
fi
|
120
|
-
|
121
|
-
data_result="${DATADIR}"/cooking.train
|
122
|
-
if [ ! -f "$data_result" ]
|
123
|
-
then
|
124
|
-
head -n 12404 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.train
|
125
|
-
fi
|
126
|
-
|
127
|
-
data_result="${DATADIR}"/cooking.valid
|
128
|
-
if [ ! -f "$data_result" ]
|
129
|
-
then
|
130
|
-
tail -n 3000 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.valid
|
131
|
-
fi
|
132
|
-
|
133
|
-
echo "Checking for YFCC100M"
|
134
|
-
|
135
|
-
data_result="${DATADIR}"/YFCC100M/train
|
136
|
-
if [ ! -f "$data_result" ]
|
137
|
-
then
|
138
|
-
echo 'Download YFCC100M, unpack it and place train into the following path: '"$data_result"
|
139
|
-
echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
|
140
|
-
echo 'After you download this, run the script again'
|
141
|
-
exit 1
|
142
|
-
fi
|
143
|
-
|
144
|
-
data_result="${DATADIR}"/YFCC100M/test
|
145
|
-
if [ ! -f "$data_result" ]
|
146
|
-
then
|
147
|
-
echo 'Download YFCC100M, unpack it and place test into the following path: '"$data_result"
|
148
|
-
echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
|
149
|
-
echo 'After you download this, run the script again'
|
150
|
-
exit 1
|
151
|
-
fi
|
152
|
-
|
153
|
-
DATASET=(
|
154
|
-
ag_news
|
155
|
-
sogou_news
|
156
|
-
dbpedia
|
157
|
-
yelp_review_polarity
|
158
|
-
yelp_review_full
|
159
|
-
yahoo_answers
|
160
|
-
amazon_review_full
|
161
|
-
amazon_review_polarity
|
162
|
-
)
|
163
|
-
|
164
|
-
ID=(
|
165
|
-
0Bz8a_Dbh9QhbUDNpeUdjb0wxRms # ag_news
|
166
|
-
0Bz8a_Dbh9QhbUkVqNEszd0pHaFE # sogou_news
|
167
|
-
0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k # dbpedia
|
168
|
-
0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg # yelp_review_polarity
|
169
|
-
0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0 # yelp_review_full
|
170
|
-
0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU # yahoo_answers
|
171
|
-
0Bz8a_Dbh9QhbZVhsUnRWRDhETzA # amazon_review_full
|
172
|
-
0Bz8a_Dbh9QhbaW12WVVZS2drcnM # amazon_review_polarity
|
173
|
-
)
|
174
|
-
|
175
|
-
# Small datasets first
|
176
|
-
|
177
|
-
for i in {0..0}
|
178
|
-
do
|
179
|
-
echo "Downloading dataset ${DATASET[i]}"
|
180
|
-
if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
|
181
|
-
then
|
182
|
-
wget -c "https://drive.google.com/uc?export=download&id=${ID[i]}" -O "${DATADIR}/${DATASET[i]}_csv.tar.gz"
|
183
|
-
tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
|
184
|
-
cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
|
185
|
-
cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
|
186
|
-
fi
|
187
|
-
done
|
188
|
-
|
189
|
-
# Large datasets require a bit more work due to the extra request page
|
190
|
-
|
191
|
-
for i in {1..7}
|
192
|
-
do
|
193
|
-
echo "Downloading dataset ${DATASET[i]}"
|
194
|
-
if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
|
195
|
-
then
|
196
|
-
curl -c /tmp/cookies "https://drive.google.com/uc?export=download&id=${ID[i]}" > /tmp/intermezzo.html
|
197
|
-
curl -L -b /tmp/cookies "https://drive.google.com$(cat /tmp/intermezzo.html | grep -Po 'uc-download-link" [^>]* href="\K[^"]*' | sed 's/\&/\&/g')" > "${DATADIR}/${DATASET[i]}_csv.tar.gz"
|
198
|
-
tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
|
199
|
-
cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
|
200
|
-
cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
|
201
|
-
fi
|
202
|
-
done
|