fasttext 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +6 -0
- data/README.md +20 -1
- data/lib/fasttext.rb +3 -0
- data/lib/fasttext/classifier.rb +12 -4
- data/lib/fasttext/vectorizer.rb +1 -1
- data/lib/fasttext/version.rb +1 -1
- metadata +4 -473
- data/lib/fasttext/ext.bundle +0 -0
- data/vendor/fastText/CMakeLists.txt +0 -68
- data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
- data/vendor/fastText/CONTRIBUTING.md +0 -32
- data/vendor/fastText/MANIFEST.in +0 -5
- data/vendor/fastText/Makefile +0 -63
- data/vendor/fastText/alignment/README.md +0 -53
- data/vendor/fastText/alignment/align.py +0 -145
- data/vendor/fastText/alignment/eval.py +0 -60
- data/vendor/fastText/alignment/example.sh +0 -51
- data/vendor/fastText/alignment/unsup_align.py +0 -109
- data/vendor/fastText/alignment/utils.py +0 -154
- data/vendor/fastText/classification-example.sh +0 -41
- data/vendor/fastText/classification-results.sh +0 -94
- data/vendor/fastText/crawl/README.md +0 -26
- data/vendor/fastText/crawl/dedup.cc +0 -51
- data/vendor/fastText/crawl/download_crawl.sh +0 -57
- data/vendor/fastText/crawl/filter_dedup.sh +0 -13
- data/vendor/fastText/crawl/filter_utf8.cc +0 -105
- data/vendor/fastText/crawl/process_wet_file.sh +0 -30
- data/vendor/fastText/docs/aligned-vectors.md +0 -64
- data/vendor/fastText/docs/api.md +0 -6
- data/vendor/fastText/docs/cheatsheet.md +0 -66
- data/vendor/fastText/docs/crawl-vectors.md +0 -125
- data/vendor/fastText/docs/dataset.md +0 -6
- data/vendor/fastText/docs/english-vectors.md +0 -53
- data/vendor/fastText/docs/faqs.md +0 -63
- data/vendor/fastText/docs/language-identification.md +0 -47
- data/vendor/fastText/docs/options.md +0 -50
- data/vendor/fastText/docs/pretrained-vectors.md +0 -142
- data/vendor/fastText/docs/python-module.md +0 -314
- data/vendor/fastText/docs/references.md +0 -41
- data/vendor/fastText/docs/supervised-models.md +0 -54
- data/vendor/fastText/docs/supervised-tutorial.md +0 -349
- data/vendor/fastText/docs/support.md +0 -58
- data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
- data/vendor/fastText/eval.py +0 -95
- data/vendor/fastText/get-wikimedia.sh +0 -79
- data/vendor/fastText/python/README.md +0 -322
- data/vendor/fastText/python/README.rst +0 -406
- data/vendor/fastText/python/benchmarks/README.rst +0 -3
- data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
- data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
- data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
- data/vendor/fastText/quantization-example.sh +0 -40
- data/vendor/fastText/runtests.py +0 -60
- data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
- data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
- data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
- data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
- data/vendor/fastText/setup.cfg +0 -2
- data/vendor/fastText/setup.py +0 -203
- data/vendor/fastText/tests/fetch_test_data.sh +0 -202
- data/vendor/fastText/website/README.md +0 -6
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
- data/vendor/fastText/website/core/Footer.js +0 -127
- data/vendor/fastText/website/package.json +0 -12
- data/vendor/fastText/website/pages/en/index.js +0 -286
- data/vendor/fastText/website/sidebars.json +0 -18
- data/vendor/fastText/website/siteConfig.js +0 -102
- data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
- data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
- data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
- data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
- data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
- data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
- data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
- data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
- data/vendor/fastText/website/static/fasttext.css +0 -48
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +0 -57
- data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,22 +0,0 @@
|
|
1
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the MIT license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
from __future__ import absolute_import
|
8
|
-
from __future__ import division
|
9
|
-
from __future__ import print_function
|
10
|
-
from __future__ import unicode_literals
|
11
|
-
|
12
|
-
from .FastText import train_supervised
|
13
|
-
from .FastText import train_unsupervised
|
14
|
-
from .FastText import load_model
|
15
|
-
from .FastText import tokenize
|
16
|
-
from .FastText import EOS
|
17
|
-
from .FastText import BOW
|
18
|
-
from .FastText import EOW
|
19
|
-
|
20
|
-
from .FastText import cbow
|
21
|
-
from .FastText import skipgram
|
22
|
-
from .FastText import supervised
|
@@ -1,388 +0,0 @@
|
|
1
|
-
/**
|
2
|
-
* Copyright (c) 2017-present, Facebook, Inc.
|
3
|
-
* All rights reserved.
|
4
|
-
*
|
5
|
-
* This source code is licensed under the MIT license found in the
|
6
|
-
* LICENSE file in the root directory of this source tree.
|
7
|
-
*/
|
8
|
-
|
9
|
-
#include <args.h>
|
10
|
-
#include <densematrix.h>
|
11
|
-
#include <fasttext.h>
|
12
|
-
#include <pybind11/pybind11.h>
|
13
|
-
#include <pybind11/stl.h>
|
14
|
-
#include <real.h>
|
15
|
-
#include <vector.h>
|
16
|
-
#include <cmath>
|
17
|
-
#include <iterator>
|
18
|
-
#include <sstream>
|
19
|
-
#include <stdexcept>
|
20
|
-
|
21
|
-
using namespace pybind11::literals;
|
22
|
-
namespace py = pybind11;
|
23
|
-
|
24
|
-
py::str castToPythonString(const std::string& s, const char* onUnicodeError) {
|
25
|
-
PyObject* handle = PyUnicode_DecodeUTF8(s.data(), s.length(), onUnicodeError);
|
26
|
-
if (!handle) {
|
27
|
-
throw py::error_already_set();
|
28
|
-
}
|
29
|
-
|
30
|
-
// py::str's constructor from a PyObject assumes the string has been encoded
|
31
|
-
// for python 2 and not encoded for python 3 :
|
32
|
-
// https://github.com/pybind/pybind11/blob/ccbe68b084806dece5863437a7dc93de20bd9b15/include/pybind11/pytypes.h#L930
|
33
|
-
#if PY_MAJOR_VERSION < 3
|
34
|
-
handle = PyUnicode_AsEncodedString(handle, "utf-8", onUnicodeError);
|
35
|
-
#endif
|
36
|
-
|
37
|
-
return py::str(handle);
|
38
|
-
}
|
39
|
-
|
40
|
-
std::pair<std::vector<py::str>, std::vector<py::str>> getLineText(
|
41
|
-
fasttext::FastText& m,
|
42
|
-
const std::string text,
|
43
|
-
const char* onUnicodeError) {
|
44
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
45
|
-
std::stringstream ioss(text);
|
46
|
-
std::string token;
|
47
|
-
std::vector<py::str> words;
|
48
|
-
std::vector<py::str> labels;
|
49
|
-
while (d->readWord(ioss, token)) {
|
50
|
-
uint32_t h = d->hash(token);
|
51
|
-
int32_t wid = d->getId(token, h);
|
52
|
-
fasttext::entry_type type = wid < 0 ? d->getType(token) : d->getType(wid);
|
53
|
-
|
54
|
-
if (type == fasttext::entry_type::word) {
|
55
|
-
words.push_back(castToPythonString(token, onUnicodeError));
|
56
|
-
// Labels must not be OOV!
|
57
|
-
} else if (type == fasttext::entry_type::label && wid >= 0) {
|
58
|
-
labels.push_back(castToPythonString(token, onUnicodeError));
|
59
|
-
}
|
60
|
-
if (token == fasttext::Dictionary::EOS)
|
61
|
-
break;
|
62
|
-
}
|
63
|
-
return std::pair<std::vector<py::str>, std::vector<py::str>>(words, labels);
|
64
|
-
}
|
65
|
-
|
66
|
-
PYBIND11_MODULE(fasttext_pybind, m) {
|
67
|
-
py::class_<fasttext::Args>(m, "args")
|
68
|
-
.def(py::init<>())
|
69
|
-
.def_readwrite("input", &fasttext::Args::input)
|
70
|
-
.def_readwrite("output", &fasttext::Args::output)
|
71
|
-
.def_readwrite("lr", &fasttext::Args::lr)
|
72
|
-
.def_readwrite("lrUpdateRate", &fasttext::Args::lrUpdateRate)
|
73
|
-
.def_readwrite("dim", &fasttext::Args::dim)
|
74
|
-
.def_readwrite("ws", &fasttext::Args::ws)
|
75
|
-
.def_readwrite("epoch", &fasttext::Args::epoch)
|
76
|
-
.def_readwrite("minCount", &fasttext::Args::minCount)
|
77
|
-
.def_readwrite("minCountLabel", &fasttext::Args::minCountLabel)
|
78
|
-
.def_readwrite("neg", &fasttext::Args::neg)
|
79
|
-
.def_readwrite("wordNgrams", &fasttext::Args::wordNgrams)
|
80
|
-
.def_readwrite("loss", &fasttext::Args::loss)
|
81
|
-
.def_readwrite("model", &fasttext::Args::model)
|
82
|
-
.def_readwrite("bucket", &fasttext::Args::bucket)
|
83
|
-
.def_readwrite("minn", &fasttext::Args::minn)
|
84
|
-
.def_readwrite("maxn", &fasttext::Args::maxn)
|
85
|
-
.def_readwrite("thread", &fasttext::Args::thread)
|
86
|
-
.def_readwrite("t", &fasttext::Args::t)
|
87
|
-
.def_readwrite("label", &fasttext::Args::label)
|
88
|
-
.def_readwrite("verbose", &fasttext::Args::verbose)
|
89
|
-
.def_readwrite("pretrainedVectors", &fasttext::Args::pretrainedVectors)
|
90
|
-
.def_readwrite("saveOutput", &fasttext::Args::saveOutput)
|
91
|
-
|
92
|
-
.def_readwrite("qout", &fasttext::Args::qout)
|
93
|
-
.def_readwrite("retrain", &fasttext::Args::retrain)
|
94
|
-
.def_readwrite("qnorm", &fasttext::Args::qnorm)
|
95
|
-
.def_readwrite("cutoff", &fasttext::Args::cutoff)
|
96
|
-
.def_readwrite("dsub", &fasttext::Args::dsub);
|
97
|
-
|
98
|
-
py::enum_<fasttext::model_name>(m, "model_name")
|
99
|
-
.value("cbow", fasttext::model_name::cbow)
|
100
|
-
.value("skipgram", fasttext::model_name::sg)
|
101
|
-
.value("supervised", fasttext::model_name::sup)
|
102
|
-
.export_values();
|
103
|
-
|
104
|
-
py::enum_<fasttext::loss_name>(m, "loss_name")
|
105
|
-
.value("hs", fasttext::loss_name::hs)
|
106
|
-
.value("ns", fasttext::loss_name::ns)
|
107
|
-
.value("softmax", fasttext::loss_name::softmax)
|
108
|
-
.value("ova", fasttext::loss_name::ova)
|
109
|
-
.export_values();
|
110
|
-
|
111
|
-
m.def(
|
112
|
-
"train",
|
113
|
-
[](fasttext::FastText& ft, fasttext::Args& a) { ft.train(a); },
|
114
|
-
py::call_guard<py::gil_scoped_release>());
|
115
|
-
|
116
|
-
py::class_<fasttext::Vector>(m, "Vector", py::buffer_protocol())
|
117
|
-
.def(py::init<ssize_t>())
|
118
|
-
.def_buffer([](fasttext::Vector& m) -> py::buffer_info {
|
119
|
-
return py::buffer_info(
|
120
|
-
m.data(),
|
121
|
-
sizeof(fasttext::real),
|
122
|
-
py::format_descriptor<fasttext::real>::format(),
|
123
|
-
1,
|
124
|
-
{m.size()},
|
125
|
-
{sizeof(fasttext::real)});
|
126
|
-
});
|
127
|
-
|
128
|
-
py::class_<fasttext::DenseMatrix>(
|
129
|
-
m, "DenseMatrix", py::buffer_protocol(), py::module_local())
|
130
|
-
.def(py::init<>())
|
131
|
-
.def(py::init<ssize_t, ssize_t>())
|
132
|
-
.def_buffer([](fasttext::DenseMatrix& m) -> py::buffer_info {
|
133
|
-
return py::buffer_info(
|
134
|
-
m.data(),
|
135
|
-
sizeof(fasttext::real),
|
136
|
-
py::format_descriptor<fasttext::real>::format(),
|
137
|
-
2,
|
138
|
-
{m.size(0), m.size(1)},
|
139
|
-
{sizeof(fasttext::real) * m.size(1),
|
140
|
-
sizeof(fasttext::real) * (int64_t)1});
|
141
|
-
});
|
142
|
-
|
143
|
-
py::class_<fasttext::FastText>(m, "fasttext")
|
144
|
-
.def(py::init<>())
|
145
|
-
.def("getArgs", &fasttext::FastText::getArgs)
|
146
|
-
.def(
|
147
|
-
"getInputMatrix",
|
148
|
-
[](fasttext::FastText& m) {
|
149
|
-
std::shared_ptr<const fasttext::DenseMatrix> mm =
|
150
|
-
m.getInputMatrix();
|
151
|
-
return *mm.get();
|
152
|
-
})
|
153
|
-
.def(
|
154
|
-
"getOutputMatrix",
|
155
|
-
[](fasttext::FastText& m) {
|
156
|
-
std::shared_ptr<const fasttext::DenseMatrix> mm =
|
157
|
-
m.getOutputMatrix();
|
158
|
-
return *mm.get();
|
159
|
-
})
|
160
|
-
.def(
|
161
|
-
"loadModel",
|
162
|
-
[](fasttext::FastText& m, std::string s) { m.loadModel(s); })
|
163
|
-
.def(
|
164
|
-
"saveModel",
|
165
|
-
[](fasttext::FastText& m, std::string s) { m.saveModel(s); })
|
166
|
-
.def(
|
167
|
-
"test",
|
168
|
-
[](fasttext::FastText& m, const std::string filename, int32_t k) {
|
169
|
-
std::ifstream ifs(filename);
|
170
|
-
if (!ifs.is_open()) {
|
171
|
-
throw std::invalid_argument("Test file cannot be opened!");
|
172
|
-
}
|
173
|
-
fasttext::Meter meter;
|
174
|
-
m.test(ifs, k, 0.0, meter);
|
175
|
-
ifs.close();
|
176
|
-
return std::tuple<int64_t, double, double>(
|
177
|
-
meter.nexamples(), meter.precision(), meter.recall());
|
178
|
-
})
|
179
|
-
.def(
|
180
|
-
"getSentenceVector",
|
181
|
-
[](fasttext::FastText& m,
|
182
|
-
fasttext::Vector& v,
|
183
|
-
const std::string text) {
|
184
|
-
std::stringstream ioss(text);
|
185
|
-
m.getSentenceVector(ioss, v);
|
186
|
-
})
|
187
|
-
.def(
|
188
|
-
"tokenize",
|
189
|
-
[](fasttext::FastText& m, const std::string text) {
|
190
|
-
std::vector<std::string> text_split;
|
191
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
192
|
-
std::stringstream ioss(text);
|
193
|
-
std::string token;
|
194
|
-
while (!ioss.eof()) {
|
195
|
-
while (d->readWord(ioss, token)) {
|
196
|
-
text_split.push_back(token);
|
197
|
-
}
|
198
|
-
}
|
199
|
-
return text_split;
|
200
|
-
})
|
201
|
-
.def("getLine", &getLineText)
|
202
|
-
.def(
|
203
|
-
"multilineGetLine",
|
204
|
-
[](fasttext::FastText& m,
|
205
|
-
const std::vector<std::string> lines,
|
206
|
-
const char* onUnicodeError) {
|
207
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
208
|
-
std::vector<std::vector<py::str>> all_words;
|
209
|
-
std::vector<std::vector<py::str>> all_labels;
|
210
|
-
for (const auto& text : lines) {
|
211
|
-
auto pair = getLineText(m, text, onUnicodeError);
|
212
|
-
all_words.push_back(pair.first);
|
213
|
-
all_labels.push_back(pair.second);
|
214
|
-
}
|
215
|
-
return std::pair<
|
216
|
-
std::vector<std::vector<py::str>>,
|
217
|
-
std::vector<std::vector<py::str>>>(all_words, all_labels);
|
218
|
-
})
|
219
|
-
.def(
|
220
|
-
"getVocab",
|
221
|
-
[](fasttext::FastText& m, const char* onUnicodeError) {
|
222
|
-
py::str s;
|
223
|
-
std::vector<py::str> vocab_list;
|
224
|
-
std::vector<int64_t> vocab_freq;
|
225
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
226
|
-
vocab_freq = d->getCounts(fasttext::entry_type::word);
|
227
|
-
for (int32_t i = 0; i < vocab_freq.size(); i++) {
|
228
|
-
vocab_list.push_back(
|
229
|
-
castToPythonString(d->getWord(i), onUnicodeError));
|
230
|
-
}
|
231
|
-
return std::pair<std::vector<py::str>, std::vector<int64_t>>(
|
232
|
-
vocab_list, vocab_freq);
|
233
|
-
})
|
234
|
-
.def(
|
235
|
-
"getLabels",
|
236
|
-
[](fasttext::FastText& m, const char* onUnicodeError) {
|
237
|
-
std::vector<py::str> labels_list;
|
238
|
-
std::vector<int64_t> labels_freq;
|
239
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
240
|
-
labels_freq = d->getCounts(fasttext::entry_type::label);
|
241
|
-
for (int32_t i = 0; i < labels_freq.size(); i++) {
|
242
|
-
labels_list.push_back(
|
243
|
-
castToPythonString(d->getLabel(i), onUnicodeError));
|
244
|
-
}
|
245
|
-
return std::pair<std::vector<py::str>, std::vector<int64_t>>(
|
246
|
-
labels_list, labels_freq);
|
247
|
-
})
|
248
|
-
.def(
|
249
|
-
"quantize",
|
250
|
-
[](fasttext::FastText& m,
|
251
|
-
const std::string input,
|
252
|
-
bool qout,
|
253
|
-
int32_t cutoff,
|
254
|
-
bool retrain,
|
255
|
-
int epoch,
|
256
|
-
double lr,
|
257
|
-
int thread,
|
258
|
-
int verbose,
|
259
|
-
int32_t dsub,
|
260
|
-
bool qnorm) {
|
261
|
-
fasttext::Args qa = fasttext::Args();
|
262
|
-
qa.input = input;
|
263
|
-
qa.qout = qout;
|
264
|
-
qa.cutoff = cutoff;
|
265
|
-
qa.retrain = retrain;
|
266
|
-
qa.epoch = epoch;
|
267
|
-
qa.lr = lr;
|
268
|
-
qa.thread = thread;
|
269
|
-
qa.verbose = verbose;
|
270
|
-
qa.dsub = dsub;
|
271
|
-
qa.qnorm = qnorm;
|
272
|
-
m.quantize(qa);
|
273
|
-
})
|
274
|
-
.def(
|
275
|
-
"predict",
|
276
|
-
// NOTE: text needs to end in a newline
|
277
|
-
// to exactly mimic the behavior of the cli
|
278
|
-
[](fasttext::FastText& m,
|
279
|
-
const std::string text,
|
280
|
-
int32_t k,
|
281
|
-
fasttext::real threshold,
|
282
|
-
const char* onUnicodeError) {
|
283
|
-
std::stringstream ioss(text);
|
284
|
-
std::vector<std::pair<fasttext::real, std::string>> predictions;
|
285
|
-
m.predictLine(ioss, predictions, k, threshold);
|
286
|
-
|
287
|
-
std::vector<std::pair<fasttext::real, py::str>>
|
288
|
-
transformedPredictions;
|
289
|
-
|
290
|
-
for (const auto& prediction : predictions) {
|
291
|
-
transformedPredictions.push_back(std::make_pair(
|
292
|
-
prediction.first,
|
293
|
-
castToPythonString(prediction.second, onUnicodeError)));
|
294
|
-
}
|
295
|
-
|
296
|
-
return transformedPredictions;
|
297
|
-
})
|
298
|
-
.def(
|
299
|
-
"multilinePredict",
|
300
|
-
// NOTE: text needs to end in a newline
|
301
|
-
// to exactly mimic the behavior of the cli
|
302
|
-
[](fasttext::FastText& m,
|
303
|
-
const std::vector<std::string>& lines,
|
304
|
-
int32_t k,
|
305
|
-
fasttext::real threshold,
|
306
|
-
const char* onUnicodeError) {
|
307
|
-
std::vector<std::vector<std::pair<fasttext::real, py::str>>>
|
308
|
-
allPredictions;
|
309
|
-
std::vector<std::pair<fasttext::real, std::string>> predictions;
|
310
|
-
|
311
|
-
for (const std::string& text : lines) {
|
312
|
-
std::stringstream ioss(text);
|
313
|
-
m.predictLine(ioss, predictions, k, threshold);
|
314
|
-
std::vector<std::pair<fasttext::real, py::str>>
|
315
|
-
transformedPredictions;
|
316
|
-
for (const auto& prediction : predictions) {
|
317
|
-
transformedPredictions.push_back(std::make_pair(
|
318
|
-
prediction.first,
|
319
|
-
castToPythonString(prediction.second, onUnicodeError)));
|
320
|
-
}
|
321
|
-
allPredictions.push_back(transformedPredictions);
|
322
|
-
}
|
323
|
-
return allPredictions;
|
324
|
-
})
|
325
|
-
.def(
|
326
|
-
"testLabel",
|
327
|
-
[](fasttext::FastText& m,
|
328
|
-
const std::string filename,
|
329
|
-
int32_t k,
|
330
|
-
fasttext::real threshold) {
|
331
|
-
std::ifstream ifs(filename);
|
332
|
-
if (!ifs.is_open()) {
|
333
|
-
throw std::invalid_argument("Test file cannot be opened!");
|
334
|
-
}
|
335
|
-
fasttext::Meter meter;
|
336
|
-
m.test(ifs, k, threshold, meter);
|
337
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
338
|
-
std::unordered_map<std::string, py::dict> returnedValue;
|
339
|
-
for (int32_t i = 0; i < d->nlabels(); i++) {
|
340
|
-
returnedValue[d->getLabel(i)] = py::dict(
|
341
|
-
"precision"_a = meter.precision(i),
|
342
|
-
"recall"_a = meter.recall(i),
|
343
|
-
"f1score"_a = meter.f1Score(i));
|
344
|
-
}
|
345
|
-
|
346
|
-
return returnedValue;
|
347
|
-
})
|
348
|
-
.def(
|
349
|
-
"getWordId",
|
350
|
-
[](fasttext::FastText& m, const std::string word) {
|
351
|
-
return m.getWordId(word);
|
352
|
-
})
|
353
|
-
.def(
|
354
|
-
"getSubwordId",
|
355
|
-
[](fasttext::FastText& m, const std::string word) {
|
356
|
-
return m.getSubwordId(word);
|
357
|
-
})
|
358
|
-
.def(
|
359
|
-
"getInputVector",
|
360
|
-
[](fasttext::FastText& m, fasttext::Vector& vec, int32_t ind) {
|
361
|
-
m.getInputVector(vec, ind);
|
362
|
-
})
|
363
|
-
.def(
|
364
|
-
"getWordVector",
|
365
|
-
[](fasttext::FastText& m,
|
366
|
-
fasttext::Vector& vec,
|
367
|
-
const std::string word) { m.getWordVector(vec, word); })
|
368
|
-
.def(
|
369
|
-
"getSubwords",
|
370
|
-
[](fasttext::FastText& m,
|
371
|
-
const std::string word,
|
372
|
-
const char* onUnicodeError) {
|
373
|
-
std::vector<std::string> subwords;
|
374
|
-
std::vector<int32_t> ngrams;
|
375
|
-
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
376
|
-
d->getSubwords(word, ngrams, subwords);
|
377
|
-
std::vector<py::str> transformedSubwords;
|
378
|
-
|
379
|
-
for (const auto& subword : subwords) {
|
380
|
-
transformedSubwords.push_back(
|
381
|
-
castToPythonString(subword, onUnicodeError));
|
382
|
-
}
|
383
|
-
|
384
|
-
return std::pair<std::vector<py::str>, std::vector<int32_t>>(
|
385
|
-
transformedSubwords, ngrams);
|
386
|
-
})
|
387
|
-
.def("isQuant", [](fasttext::FastText& m) { return m.isQuant(); });
|
388
|
-
}
|
@@ -1,14 +0,0 @@
|
|
1
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the MIT license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
from __future__ import absolute_import
|
8
|
-
from __future__ import division
|
9
|
-
from __future__ import print_function
|
10
|
-
from __future__ import unicode_literals
|
11
|
-
|
12
|
-
from .test_configurations import get_supervised_models
|
13
|
-
from .test_script import gen_tests
|
14
|
-
from .test_script import gen_unit_tests
|
@@ -1,239 +0,0 @@
|
|
1
|
-
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
-
# All rights reserved.
|
3
|
-
#
|
4
|
-
# This source code is licensed under the MIT license found in the
|
5
|
-
# LICENSE file in the root directory of this source tree.
|
6
|
-
|
7
|
-
from __future__ import absolute_import
|
8
|
-
from __future__ import division
|
9
|
-
from __future__ import print_function
|
10
|
-
from __future__ import unicode_literals
|
11
|
-
|
12
|
-
import multiprocessing
|
13
|
-
|
14
|
-
# This script represents a collection of integration tests
|
15
|
-
# Each integration test comes with a full set of parameters,
|
16
|
-
# a dataset, and expected metrics.
|
17
|
-
# These configurations can be used by various fastText APIs
|
18
|
-
# to confirm some level of correctness.
|
19
|
-
|
20
|
-
|
21
|
-
def max_thread():
|
22
|
-
return multiprocessing.cpu_count() - 1
|
23
|
-
|
24
|
-
|
25
|
-
def check_supervised_configuration(configuration, verbose=1):
|
26
|
-
configuration["args"]["verbose"] = verbose
|
27
|
-
configuration["quant_args"]["verbose"] = verbose
|
28
|
-
return configuration
|
29
|
-
|
30
|
-
|
31
|
-
def check_supervised_configurations(configurations, verbose=1):
|
32
|
-
for i in range(len(configurations)):
|
33
|
-
configurations[i] = check_supervised_configuration(
|
34
|
-
configurations[i], verbose=verbose
|
35
|
-
)
|
36
|
-
return configurations
|
37
|
-
|
38
|
-
|
39
|
-
def flickr_job(thread=None):
|
40
|
-
if thread is None:
|
41
|
-
thread = max_thread()
|
42
|
-
config = {}
|
43
|
-
config["dataset"] = "YFCC100M"
|
44
|
-
config["args"] = {
|
45
|
-
"dim": 256,
|
46
|
-
"wordNgrams": 2,
|
47
|
-
"minCount": 10,
|
48
|
-
"bucket": 10000000,
|
49
|
-
"epoch": 20,
|
50
|
-
"loss": "hs",
|
51
|
-
"minCountLabel": 100,
|
52
|
-
"thread": thread
|
53
|
-
}
|
54
|
-
config["args"]["input"] = "YFCC100M/train"
|
55
|
-
config["quant_args"] = {
|
56
|
-
"dsub": 2,
|
57
|
-
"lr": 0.1,
|
58
|
-
"epoch": 5,
|
59
|
-
"cutoff": 100000,
|
60
|
-
"qnorm": True,
|
61
|
-
"retrain": True,
|
62
|
-
"qout": True
|
63
|
-
}
|
64
|
-
config["quant_args"]["input"] = config["args"]["input"]
|
65
|
-
config["test"] = {
|
66
|
-
"n": 647224,
|
67
|
-
"p1": 0.470,
|
68
|
-
"r1": 0.071,
|
69
|
-
"size": 12060039727,
|
70
|
-
"data": "YFCC100M/test",
|
71
|
-
}
|
72
|
-
# One quant example (to illustrate slack): 0.344, 0.0528, 64506972
|
73
|
-
config["quant_test"] = {
|
74
|
-
"n": 647224,
|
75
|
-
"p1": 0.300,
|
76
|
-
"r1": 0.0450,
|
77
|
-
"size": 70000000,
|
78
|
-
"data": "YFCC100M/test",
|
79
|
-
}
|
80
|
-
return config
|
81
|
-
|
82
|
-
|
83
|
-
def langid_job1(thread=None):
|
84
|
-
if thread is None:
|
85
|
-
thread = max_thread()
|
86
|
-
config = {}
|
87
|
-
config["dataset"] = "langid"
|
88
|
-
config["args"] = {"dim": 16, "minn": 2, "maxn": 4, "thread": thread}
|
89
|
-
config["args"]["input"] = "langid.train"
|
90
|
-
config["quant_args"] = {"qnorm": True, "cutoff": 50000, "retrain": True}
|
91
|
-
config["quant_args"]["input"] = config["args"]["input"]
|
92
|
-
config["test"] = {
|
93
|
-
"n": 10000,
|
94
|
-
"p1": 0.985,
|
95
|
-
"r1": 0.985,
|
96
|
-
"size": 368132610,
|
97
|
-
"data": "langid.valid",
|
98
|
-
}
|
99
|
-
# One quant example (to illustrate slack): 0.984 0.984 932793
|
100
|
-
config["quant_test"] = {
|
101
|
-
"p1": 0.97,
|
102
|
-
"r1": 0.97,
|
103
|
-
"size": 1000000,
|
104
|
-
}
|
105
|
-
config["quant_test"]["n"] = config["test"]["n"]
|
106
|
-
config["quant_test"]["data"] = config["test"]["data"]
|
107
|
-
return config
|
108
|
-
|
109
|
-
|
110
|
-
def langid_job2(thread=None):
|
111
|
-
if thread is None:
|
112
|
-
thread = max_thread()
|
113
|
-
config = langid_job1(thread).copy()
|
114
|
-
config["args"]["loss"] = "hs"
|
115
|
-
return config
|
116
|
-
|
117
|
-
|
118
|
-
def cooking_job1(thread=None):
|
119
|
-
if thread is None:
|
120
|
-
thread = max_thread()
|
121
|
-
config = {}
|
122
|
-
config["dataset"] = "cooking"
|
123
|
-
config["args"] = {
|
124
|
-
"epoch": 25,
|
125
|
-
"lr": 1.0,
|
126
|
-
"wordNgrams": 2,
|
127
|
-
"minCount": 1,
|
128
|
-
"thread": thread,
|
129
|
-
}
|
130
|
-
config["args"]["input"] = "cooking.train"
|
131
|
-
config["quant_args"] = {"qnorm": True, "cutoff": 50000, "retrain": True}
|
132
|
-
config["quant_args"]["input"] = config["args"]["input"]
|
133
|
-
config["test"] = {
|
134
|
-
"n": 3000,
|
135
|
-
"p1": 0.59,
|
136
|
-
"r1": 0.25,
|
137
|
-
"size": 804047585,
|
138
|
-
"data": "cooking.valid",
|
139
|
-
}
|
140
|
-
# One quant example (to illustrate slack): 0.602 0.26 3439172
|
141
|
-
config["quant_test"] = {
|
142
|
-
"p1": 0.55,
|
143
|
-
"r1": 0.20,
|
144
|
-
"size": 4000000,
|
145
|
-
}
|
146
|
-
config["quant_test"]["n"] = config["test"]["n"]
|
147
|
-
config["quant_test"]["data"] = config["test"]["data"]
|
148
|
-
return config
|
149
|
-
|
150
|
-
|
151
|
-
def cooking_job2(thread=None):
|
152
|
-
if thread is None:
|
153
|
-
thread = max_thread()
|
154
|
-
config = cooking_job1(thread).copy()
|
155
|
-
config["args"]["loss"] = "hs"
|
156
|
-
return config
|
157
|
-
|
158
|
-
|
159
|
-
# Supervised models
|
160
|
-
# See https://fasttext.cc/docs/en/supervised-models.html
|
161
|
-
def get_supervised_models(thread=None, verbose=1):
|
162
|
-
if thread is None:
|
163
|
-
thread = max_thread()
|
164
|
-
sup_job_dataset = [
|
165
|
-
"ag_news", "sogou_news", "dbpedia", "yelp_review_polarity",
|
166
|
-
"yelp_review_full", "yahoo_answers", "amazon_review_full",
|
167
|
-
"amazon_review_polarity"
|
168
|
-
]
|
169
|
-
|
170
|
-
sup_params = {
|
171
|
-
"dim": 10,
|
172
|
-
"wordNgrams": 2,
|
173
|
-
"minCount": 1,
|
174
|
-
"bucket": 10000000,
|
175
|
-
"epoch": 5,
|
176
|
-
"thread": thread,
|
177
|
-
"verbose": 1,
|
178
|
-
}
|
179
|
-
quant_params = {
|
180
|
-
"retrain": True,
|
181
|
-
"cutoff": 100000,
|
182
|
-
"qnorm": True,
|
183
|
-
"verbose": 1,
|
184
|
-
}
|
185
|
-
sup_job_lr = [0.25, 0.5, 0.5, 0.1, 0.1, 0.1, 0.05, 0.05]
|
186
|
-
|
187
|
-
sup_job_n = [7600, 60000, 70000, 38000, 50000, 60000, 650000, 400000]
|
188
|
-
|
189
|
-
sup_job_p1 = [0.915, 0.968, 0.983, 0.956, 0.638, 0.723, 0.600, 0.940]
|
190
|
-
sup_job_r1 = [0.915, 0.968, 0.983, 0.956, 0.638, 0.723, 0.600, 0.940]
|
191
|
-
sup_job_size = [
|
192
|
-
405607193, 421445471, 447481878, 427867393, 431292576, 517549567,
|
193
|
-
483742593, 493604598
|
194
|
-
]
|
195
|
-
|
196
|
-
sup_job_quant_p1 = [0.918, 0.965, 0.983, 0.950, 0.625, 0.707, 0.58, 0.920]
|
197
|
-
sup_job_quant_r1 = [0.918, 0.965, 0.983, 0.950, 0.625, 0.707, 0.58, 0.920]
|
198
|
-
sup_job_quant_size = [
|
199
|
-
1600000, 1500000, 1700000, 1600000, 1600000, 1700000, 1600000, 1600000
|
200
|
-
]
|
201
|
-
|
202
|
-
configurations = []
|
203
|
-
for i in range(len(sup_job_dataset)):
|
204
|
-
configuration = {}
|
205
|
-
configuration["dataset"] = sup_job_dataset[i]
|
206
|
-
args = sup_params.copy()
|
207
|
-
quant_args = quant_params.copy()
|
208
|
-
args["lr"] = sup_job_lr[i]
|
209
|
-
args["input"] = sup_job_dataset[i] + ".train"
|
210
|
-
quant_args["lr"] = sup_job_lr[i]
|
211
|
-
quant_args["input"] = sup_job_dataset[i] + ".train"
|
212
|
-
configuration["args"] = args
|
213
|
-
configuration["quant_args"] = quant_args
|
214
|
-
test = {
|
215
|
-
"n": sup_job_n[i],
|
216
|
-
"p1": sup_job_p1[i],
|
217
|
-
"r1": sup_job_r1[i],
|
218
|
-
"size": sup_job_size[i],
|
219
|
-
"data": sup_job_dataset[i] + ".test",
|
220
|
-
}
|
221
|
-
quant_test = {
|
222
|
-
"n": sup_job_n[i],
|
223
|
-
"p1": sup_job_quant_p1[i],
|
224
|
-
"r1": sup_job_quant_r1[i],
|
225
|
-
"size": sup_job_quant_size[i],
|
226
|
-
"data": sup_job_dataset[i] + ".test",
|
227
|
-
}
|
228
|
-
configuration["test"] = test
|
229
|
-
configuration["quant_test"] = quant_test
|
230
|
-
configurations.append(configuration)
|
231
|
-
configurations.append(flickr_job())
|
232
|
-
configurations.append(langid_job1())
|
233
|
-
configurations.append(langid_job2())
|
234
|
-
configurations.append(cooking_job1())
|
235
|
-
configurations.append(cooking_job2())
|
236
|
-
configurations = check_supervised_configurations(
|
237
|
-
configurations, verbose=verbose
|
238
|
-
)
|
239
|
-
return configurations
|