fasttext 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/CHANGELOG.md +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +251 -0
- data/ext/fasttext/ext.cpp +291 -0
- data/ext/fasttext/extconf.rb +15 -0
- data/lib/fasttext.rb +41 -0
- data/lib/fasttext/classifier.rb +92 -0
- data/lib/fasttext/ext.bundle +0 -0
- data/lib/fasttext/model.rb +60 -0
- data/lib/fasttext/vectorizer.rb +58 -0
- data/lib/fasttext/version.rb +3 -0
- data/vendor/fastText/CMakeLists.txt +68 -0
- data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
- data/vendor/fastText/CONTRIBUTING.md +32 -0
- data/vendor/fastText/LICENSE +21 -0
- data/vendor/fastText/MANIFEST.in +5 -0
- data/vendor/fastText/Makefile +63 -0
- data/vendor/fastText/README.md +339 -0
- data/vendor/fastText/alignment/README.md +53 -0
- data/vendor/fastText/alignment/align.py +145 -0
- data/vendor/fastText/alignment/eval.py +60 -0
- data/vendor/fastText/alignment/example.sh +51 -0
- data/vendor/fastText/alignment/unsup_align.py +109 -0
- data/vendor/fastText/alignment/utils.py +154 -0
- data/vendor/fastText/classification-example.sh +41 -0
- data/vendor/fastText/classification-results.sh +94 -0
- data/vendor/fastText/crawl/README.md +26 -0
- data/vendor/fastText/crawl/dedup.cc +51 -0
- data/vendor/fastText/crawl/download_crawl.sh +57 -0
- data/vendor/fastText/crawl/filter_dedup.sh +13 -0
- data/vendor/fastText/crawl/filter_utf8.cc +105 -0
- data/vendor/fastText/crawl/process_wet_file.sh +30 -0
- data/vendor/fastText/docs/aligned-vectors.md +64 -0
- data/vendor/fastText/docs/api.md +6 -0
- data/vendor/fastText/docs/cheatsheet.md +66 -0
- data/vendor/fastText/docs/crawl-vectors.md +125 -0
- data/vendor/fastText/docs/dataset.md +6 -0
- data/vendor/fastText/docs/english-vectors.md +53 -0
- data/vendor/fastText/docs/faqs.md +63 -0
- data/vendor/fastText/docs/language-identification.md +47 -0
- data/vendor/fastText/docs/options.md +50 -0
- data/vendor/fastText/docs/pretrained-vectors.md +142 -0
- data/vendor/fastText/docs/python-module.md +314 -0
- data/vendor/fastText/docs/references.md +41 -0
- data/vendor/fastText/docs/supervised-models.md +54 -0
- data/vendor/fastText/docs/supervised-tutorial.md +349 -0
- data/vendor/fastText/docs/support.md +58 -0
- data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
- data/vendor/fastText/eval.py +95 -0
- data/vendor/fastText/get-wikimedia.sh +79 -0
- data/vendor/fastText/python/README.md +322 -0
- data/vendor/fastText/python/README.rst +406 -0
- data/vendor/fastText/python/benchmarks/README.rst +3 -0
- data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
- data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
- data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
- data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
- data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
- data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
- data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
- data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
- data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
- data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
- data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
- data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
- data/vendor/fastText/quantization-example.sh +40 -0
- data/vendor/fastText/runtests.py +60 -0
- data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
- data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
- data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
- data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
- data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
- data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
- data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
- data/vendor/fastText/setup.cfg +2 -0
- data/vendor/fastText/setup.py +203 -0
- data/vendor/fastText/src/args.cc +320 -0
- data/vendor/fastText/src/args.h +68 -0
- data/vendor/fastText/src/densematrix.cc +155 -0
- data/vendor/fastText/src/densematrix.h +75 -0
- data/vendor/fastText/src/dictionary.cc +540 -0
- data/vendor/fastText/src/dictionary.h +111 -0
- data/vendor/fastText/src/fasttext.cc +821 -0
- data/vendor/fastText/src/fasttext.h +191 -0
- data/vendor/fastText/src/loss.cc +346 -0
- data/vendor/fastText/src/loss.h +163 -0
- data/vendor/fastText/src/main.cc +435 -0
- data/vendor/fastText/src/matrix.cc +25 -0
- data/vendor/fastText/src/matrix.h +44 -0
- data/vendor/fastText/src/meter.cc +68 -0
- data/vendor/fastText/src/meter.h +69 -0
- data/vendor/fastText/src/model.cc +98 -0
- data/vendor/fastText/src/model.h +79 -0
- data/vendor/fastText/src/productquantizer.cc +251 -0
- data/vendor/fastText/src/productquantizer.h +63 -0
- data/vendor/fastText/src/quantmatrix.cc +117 -0
- data/vendor/fastText/src/quantmatrix.h +60 -0
- data/vendor/fastText/src/real.h +15 -0
- data/vendor/fastText/src/utils.cc +28 -0
- data/vendor/fastText/src/utils.h +43 -0
- data/vendor/fastText/src/vector.cc +97 -0
- data/vendor/fastText/src/vector.h +61 -0
- data/vendor/fastText/tests/fetch_test_data.sh +202 -0
- data/vendor/fastText/website/README.md +6 -0
- data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
- data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
- data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
- data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
- data/vendor/fastText/website/core/Footer.js +127 -0
- data/vendor/fastText/website/package.json +12 -0
- data/vendor/fastText/website/pages/en/index.js +286 -0
- data/vendor/fastText/website/sidebars.json +18 -0
- data/vendor/fastText/website/siteConfig.js +102 -0
- data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
- data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
- data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
- data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
- data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
- data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
- data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
- data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
- data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
- data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
- data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
- data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
- data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
- data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
- data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
- data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
- data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
- data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
- data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
- data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
- data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
- data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
- data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
- data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
- data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
- data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
- data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
- data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
- data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
- data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
- data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
- data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
- data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
- data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
- data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
- data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
- data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
- data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
- data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
- data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
- data/vendor/fastText/website/static/fasttext.css +48 -0
- data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
- data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
- data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
- data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
- data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
- data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
- data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
- data/vendor/fastText/website/static/img/logo-color.png +0 -0
- data/vendor/fastText/website/static/img/model-black.png +0 -0
- data/vendor/fastText/website/static/img/model-blue.png +0 -0
- data/vendor/fastText/website/static/img/model-red.png +0 -0
- data/vendor/fastText/website/static/img/ogimage.png +0 -0
- data/vendor/fastText/website/static/img/oss_logo.png +0 -0
- data/vendor/fastText/wikifil.pl +57 -0
- data/vendor/fastText/word-vector-example.sh +39 -0
- metadata +621 -0
@@ -0,0 +1,22 @@
|
|
1
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the MIT license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
from __future__ import absolute_import
|
8
|
+
from __future__ import division
|
9
|
+
from __future__ import print_function
|
10
|
+
from __future__ import unicode_literals
|
11
|
+
|
12
|
+
from .FastText import train_supervised
|
13
|
+
from .FastText import train_unsupervised
|
14
|
+
from .FastText import load_model
|
15
|
+
from .FastText import tokenize
|
16
|
+
from .FastText import EOS
|
17
|
+
from .FastText import BOW
|
18
|
+
from .FastText import EOW
|
19
|
+
|
20
|
+
from .FastText import cbow
|
21
|
+
from .FastText import skipgram
|
22
|
+
from .FastText import supervised
|
@@ -0,0 +1,388 @@
|
|
1
|
+
/**
|
2
|
+
* Copyright (c) 2017-present, Facebook, Inc.
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* This source code is licensed under the MIT license found in the
|
6
|
+
* LICENSE file in the root directory of this source tree.
|
7
|
+
*/
|
8
|
+
|
9
|
+
#include <args.h>
|
10
|
+
#include <densematrix.h>
|
11
|
+
#include <fasttext.h>
|
12
|
+
#include <pybind11/pybind11.h>
|
13
|
+
#include <pybind11/stl.h>
|
14
|
+
#include <real.h>
|
15
|
+
#include <vector.h>
|
16
|
+
#include <cmath>
|
17
|
+
#include <iterator>
|
18
|
+
#include <sstream>
|
19
|
+
#include <stdexcept>
|
20
|
+
|
21
|
+
using namespace pybind11::literals;
|
22
|
+
namespace py = pybind11;
|
23
|
+
|
24
|
+
py::str castToPythonString(const std::string& s, const char* onUnicodeError) {
|
25
|
+
PyObject* handle = PyUnicode_DecodeUTF8(s.data(), s.length(), onUnicodeError);
|
26
|
+
if (!handle) {
|
27
|
+
throw py::error_already_set();
|
28
|
+
}
|
29
|
+
|
30
|
+
// py::str's constructor from a PyObject assumes the string has been encoded
|
31
|
+
// for python 2 and not encoded for python 3 :
|
32
|
+
// https://github.com/pybind/pybind11/blob/ccbe68b084806dece5863437a7dc93de20bd9b15/include/pybind11/pytypes.h#L930
|
33
|
+
#if PY_MAJOR_VERSION < 3
|
34
|
+
handle = PyUnicode_AsEncodedString(handle, "utf-8", onUnicodeError);
|
35
|
+
#endif
|
36
|
+
|
37
|
+
return py::str(handle);
|
38
|
+
}
|
39
|
+
|
40
|
+
std::pair<std::vector<py::str>, std::vector<py::str>> getLineText(
|
41
|
+
fasttext::FastText& m,
|
42
|
+
const std::string text,
|
43
|
+
const char* onUnicodeError) {
|
44
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
45
|
+
std::stringstream ioss(text);
|
46
|
+
std::string token;
|
47
|
+
std::vector<py::str> words;
|
48
|
+
std::vector<py::str> labels;
|
49
|
+
while (d->readWord(ioss, token)) {
|
50
|
+
uint32_t h = d->hash(token);
|
51
|
+
int32_t wid = d->getId(token, h);
|
52
|
+
fasttext::entry_type type = wid < 0 ? d->getType(token) : d->getType(wid);
|
53
|
+
|
54
|
+
if (type == fasttext::entry_type::word) {
|
55
|
+
words.push_back(castToPythonString(token, onUnicodeError));
|
56
|
+
// Labels must not be OOV!
|
57
|
+
} else if (type == fasttext::entry_type::label && wid >= 0) {
|
58
|
+
labels.push_back(castToPythonString(token, onUnicodeError));
|
59
|
+
}
|
60
|
+
if (token == fasttext::Dictionary::EOS)
|
61
|
+
break;
|
62
|
+
}
|
63
|
+
return std::pair<std::vector<py::str>, std::vector<py::str>>(words, labels);
|
64
|
+
}
|
65
|
+
|
66
|
+
PYBIND11_MODULE(fasttext_pybind, m) {
|
67
|
+
py::class_<fasttext::Args>(m, "args")
|
68
|
+
.def(py::init<>())
|
69
|
+
.def_readwrite("input", &fasttext::Args::input)
|
70
|
+
.def_readwrite("output", &fasttext::Args::output)
|
71
|
+
.def_readwrite("lr", &fasttext::Args::lr)
|
72
|
+
.def_readwrite("lrUpdateRate", &fasttext::Args::lrUpdateRate)
|
73
|
+
.def_readwrite("dim", &fasttext::Args::dim)
|
74
|
+
.def_readwrite("ws", &fasttext::Args::ws)
|
75
|
+
.def_readwrite("epoch", &fasttext::Args::epoch)
|
76
|
+
.def_readwrite("minCount", &fasttext::Args::minCount)
|
77
|
+
.def_readwrite("minCountLabel", &fasttext::Args::minCountLabel)
|
78
|
+
.def_readwrite("neg", &fasttext::Args::neg)
|
79
|
+
.def_readwrite("wordNgrams", &fasttext::Args::wordNgrams)
|
80
|
+
.def_readwrite("loss", &fasttext::Args::loss)
|
81
|
+
.def_readwrite("model", &fasttext::Args::model)
|
82
|
+
.def_readwrite("bucket", &fasttext::Args::bucket)
|
83
|
+
.def_readwrite("minn", &fasttext::Args::minn)
|
84
|
+
.def_readwrite("maxn", &fasttext::Args::maxn)
|
85
|
+
.def_readwrite("thread", &fasttext::Args::thread)
|
86
|
+
.def_readwrite("t", &fasttext::Args::t)
|
87
|
+
.def_readwrite("label", &fasttext::Args::label)
|
88
|
+
.def_readwrite("verbose", &fasttext::Args::verbose)
|
89
|
+
.def_readwrite("pretrainedVectors", &fasttext::Args::pretrainedVectors)
|
90
|
+
.def_readwrite("saveOutput", &fasttext::Args::saveOutput)
|
91
|
+
|
92
|
+
.def_readwrite("qout", &fasttext::Args::qout)
|
93
|
+
.def_readwrite("retrain", &fasttext::Args::retrain)
|
94
|
+
.def_readwrite("qnorm", &fasttext::Args::qnorm)
|
95
|
+
.def_readwrite("cutoff", &fasttext::Args::cutoff)
|
96
|
+
.def_readwrite("dsub", &fasttext::Args::dsub);
|
97
|
+
|
98
|
+
py::enum_<fasttext::model_name>(m, "model_name")
|
99
|
+
.value("cbow", fasttext::model_name::cbow)
|
100
|
+
.value("skipgram", fasttext::model_name::sg)
|
101
|
+
.value("supervised", fasttext::model_name::sup)
|
102
|
+
.export_values();
|
103
|
+
|
104
|
+
py::enum_<fasttext::loss_name>(m, "loss_name")
|
105
|
+
.value("hs", fasttext::loss_name::hs)
|
106
|
+
.value("ns", fasttext::loss_name::ns)
|
107
|
+
.value("softmax", fasttext::loss_name::softmax)
|
108
|
+
.value("ova", fasttext::loss_name::ova)
|
109
|
+
.export_values();
|
110
|
+
|
111
|
+
m.def(
|
112
|
+
"train",
|
113
|
+
[](fasttext::FastText& ft, fasttext::Args& a) { ft.train(a); },
|
114
|
+
py::call_guard<py::gil_scoped_release>());
|
115
|
+
|
116
|
+
py::class_<fasttext::Vector>(m, "Vector", py::buffer_protocol())
|
117
|
+
.def(py::init<ssize_t>())
|
118
|
+
.def_buffer([](fasttext::Vector& m) -> py::buffer_info {
|
119
|
+
return py::buffer_info(
|
120
|
+
m.data(),
|
121
|
+
sizeof(fasttext::real),
|
122
|
+
py::format_descriptor<fasttext::real>::format(),
|
123
|
+
1,
|
124
|
+
{m.size()},
|
125
|
+
{sizeof(fasttext::real)});
|
126
|
+
});
|
127
|
+
|
128
|
+
py::class_<fasttext::DenseMatrix>(
|
129
|
+
m, "DenseMatrix", py::buffer_protocol(), py::module_local())
|
130
|
+
.def(py::init<>())
|
131
|
+
.def(py::init<ssize_t, ssize_t>())
|
132
|
+
.def_buffer([](fasttext::DenseMatrix& m) -> py::buffer_info {
|
133
|
+
return py::buffer_info(
|
134
|
+
m.data(),
|
135
|
+
sizeof(fasttext::real),
|
136
|
+
py::format_descriptor<fasttext::real>::format(),
|
137
|
+
2,
|
138
|
+
{m.size(0), m.size(1)},
|
139
|
+
{sizeof(fasttext::real) * m.size(1),
|
140
|
+
sizeof(fasttext::real) * (int64_t)1});
|
141
|
+
});
|
142
|
+
|
143
|
+
py::class_<fasttext::FastText>(m, "fasttext")
|
144
|
+
.def(py::init<>())
|
145
|
+
.def("getArgs", &fasttext::FastText::getArgs)
|
146
|
+
.def(
|
147
|
+
"getInputMatrix",
|
148
|
+
[](fasttext::FastText& m) {
|
149
|
+
std::shared_ptr<const fasttext::DenseMatrix> mm =
|
150
|
+
m.getInputMatrix();
|
151
|
+
return *mm.get();
|
152
|
+
})
|
153
|
+
.def(
|
154
|
+
"getOutputMatrix",
|
155
|
+
[](fasttext::FastText& m) {
|
156
|
+
std::shared_ptr<const fasttext::DenseMatrix> mm =
|
157
|
+
m.getOutputMatrix();
|
158
|
+
return *mm.get();
|
159
|
+
})
|
160
|
+
.def(
|
161
|
+
"loadModel",
|
162
|
+
[](fasttext::FastText& m, std::string s) { m.loadModel(s); })
|
163
|
+
.def(
|
164
|
+
"saveModel",
|
165
|
+
[](fasttext::FastText& m, std::string s) { m.saveModel(s); })
|
166
|
+
.def(
|
167
|
+
"test",
|
168
|
+
[](fasttext::FastText& m, const std::string filename, int32_t k) {
|
169
|
+
std::ifstream ifs(filename);
|
170
|
+
if (!ifs.is_open()) {
|
171
|
+
throw std::invalid_argument("Test file cannot be opened!");
|
172
|
+
}
|
173
|
+
fasttext::Meter meter;
|
174
|
+
m.test(ifs, k, 0.0, meter);
|
175
|
+
ifs.close();
|
176
|
+
return std::tuple<int64_t, double, double>(
|
177
|
+
meter.nexamples(), meter.precision(), meter.recall());
|
178
|
+
})
|
179
|
+
.def(
|
180
|
+
"getSentenceVector",
|
181
|
+
[](fasttext::FastText& m,
|
182
|
+
fasttext::Vector& v,
|
183
|
+
const std::string text) {
|
184
|
+
std::stringstream ioss(text);
|
185
|
+
m.getSentenceVector(ioss, v);
|
186
|
+
})
|
187
|
+
.def(
|
188
|
+
"tokenize",
|
189
|
+
[](fasttext::FastText& m, const std::string text) {
|
190
|
+
std::vector<std::string> text_split;
|
191
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
192
|
+
std::stringstream ioss(text);
|
193
|
+
std::string token;
|
194
|
+
while (!ioss.eof()) {
|
195
|
+
while (d->readWord(ioss, token)) {
|
196
|
+
text_split.push_back(token);
|
197
|
+
}
|
198
|
+
}
|
199
|
+
return text_split;
|
200
|
+
})
|
201
|
+
.def("getLine", &getLineText)
|
202
|
+
.def(
|
203
|
+
"multilineGetLine",
|
204
|
+
[](fasttext::FastText& m,
|
205
|
+
const std::vector<std::string> lines,
|
206
|
+
const char* onUnicodeError) {
|
207
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
208
|
+
std::vector<std::vector<py::str>> all_words;
|
209
|
+
std::vector<std::vector<py::str>> all_labels;
|
210
|
+
for (const auto& text : lines) {
|
211
|
+
auto pair = getLineText(m, text, onUnicodeError);
|
212
|
+
all_words.push_back(pair.first);
|
213
|
+
all_labels.push_back(pair.second);
|
214
|
+
}
|
215
|
+
return std::pair<
|
216
|
+
std::vector<std::vector<py::str>>,
|
217
|
+
std::vector<std::vector<py::str>>>(all_words, all_labels);
|
218
|
+
})
|
219
|
+
.def(
|
220
|
+
"getVocab",
|
221
|
+
[](fasttext::FastText& m, const char* onUnicodeError) {
|
222
|
+
py::str s;
|
223
|
+
std::vector<py::str> vocab_list;
|
224
|
+
std::vector<int64_t> vocab_freq;
|
225
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
226
|
+
vocab_freq = d->getCounts(fasttext::entry_type::word);
|
227
|
+
for (int32_t i = 0; i < vocab_freq.size(); i++) {
|
228
|
+
vocab_list.push_back(
|
229
|
+
castToPythonString(d->getWord(i), onUnicodeError));
|
230
|
+
}
|
231
|
+
return std::pair<std::vector<py::str>, std::vector<int64_t>>(
|
232
|
+
vocab_list, vocab_freq);
|
233
|
+
})
|
234
|
+
.def(
|
235
|
+
"getLabels",
|
236
|
+
[](fasttext::FastText& m, const char* onUnicodeError) {
|
237
|
+
std::vector<py::str> labels_list;
|
238
|
+
std::vector<int64_t> labels_freq;
|
239
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
240
|
+
labels_freq = d->getCounts(fasttext::entry_type::label);
|
241
|
+
for (int32_t i = 0; i < labels_freq.size(); i++) {
|
242
|
+
labels_list.push_back(
|
243
|
+
castToPythonString(d->getLabel(i), onUnicodeError));
|
244
|
+
}
|
245
|
+
return std::pair<std::vector<py::str>, std::vector<int64_t>>(
|
246
|
+
labels_list, labels_freq);
|
247
|
+
})
|
248
|
+
.def(
|
249
|
+
"quantize",
|
250
|
+
[](fasttext::FastText& m,
|
251
|
+
const std::string input,
|
252
|
+
bool qout,
|
253
|
+
int32_t cutoff,
|
254
|
+
bool retrain,
|
255
|
+
int epoch,
|
256
|
+
double lr,
|
257
|
+
int thread,
|
258
|
+
int verbose,
|
259
|
+
int32_t dsub,
|
260
|
+
bool qnorm) {
|
261
|
+
fasttext::Args qa = fasttext::Args();
|
262
|
+
qa.input = input;
|
263
|
+
qa.qout = qout;
|
264
|
+
qa.cutoff = cutoff;
|
265
|
+
qa.retrain = retrain;
|
266
|
+
qa.epoch = epoch;
|
267
|
+
qa.lr = lr;
|
268
|
+
qa.thread = thread;
|
269
|
+
qa.verbose = verbose;
|
270
|
+
qa.dsub = dsub;
|
271
|
+
qa.qnorm = qnorm;
|
272
|
+
m.quantize(qa);
|
273
|
+
})
|
274
|
+
.def(
|
275
|
+
"predict",
|
276
|
+
// NOTE: text needs to end in a newline
|
277
|
+
// to exactly mimic the behavior of the cli
|
278
|
+
[](fasttext::FastText& m,
|
279
|
+
const std::string text,
|
280
|
+
int32_t k,
|
281
|
+
fasttext::real threshold,
|
282
|
+
const char* onUnicodeError) {
|
283
|
+
std::stringstream ioss(text);
|
284
|
+
std::vector<std::pair<fasttext::real, std::string>> predictions;
|
285
|
+
m.predictLine(ioss, predictions, k, threshold);
|
286
|
+
|
287
|
+
std::vector<std::pair<fasttext::real, py::str>>
|
288
|
+
transformedPredictions;
|
289
|
+
|
290
|
+
for (const auto& prediction : predictions) {
|
291
|
+
transformedPredictions.push_back(std::make_pair(
|
292
|
+
prediction.first,
|
293
|
+
castToPythonString(prediction.second, onUnicodeError)));
|
294
|
+
}
|
295
|
+
|
296
|
+
return transformedPredictions;
|
297
|
+
})
|
298
|
+
.def(
|
299
|
+
"multilinePredict",
|
300
|
+
// NOTE: text needs to end in a newline
|
301
|
+
// to exactly mimic the behavior of the cli
|
302
|
+
[](fasttext::FastText& m,
|
303
|
+
const std::vector<std::string>& lines,
|
304
|
+
int32_t k,
|
305
|
+
fasttext::real threshold,
|
306
|
+
const char* onUnicodeError) {
|
307
|
+
std::vector<std::vector<std::pair<fasttext::real, py::str>>>
|
308
|
+
allPredictions;
|
309
|
+
std::vector<std::pair<fasttext::real, std::string>> predictions;
|
310
|
+
|
311
|
+
for (const std::string& text : lines) {
|
312
|
+
std::stringstream ioss(text);
|
313
|
+
m.predictLine(ioss, predictions, k, threshold);
|
314
|
+
std::vector<std::pair<fasttext::real, py::str>>
|
315
|
+
transformedPredictions;
|
316
|
+
for (const auto& prediction : predictions) {
|
317
|
+
transformedPredictions.push_back(std::make_pair(
|
318
|
+
prediction.first,
|
319
|
+
castToPythonString(prediction.second, onUnicodeError)));
|
320
|
+
}
|
321
|
+
allPredictions.push_back(transformedPredictions);
|
322
|
+
}
|
323
|
+
return allPredictions;
|
324
|
+
})
|
325
|
+
.def(
|
326
|
+
"testLabel",
|
327
|
+
[](fasttext::FastText& m,
|
328
|
+
const std::string filename,
|
329
|
+
int32_t k,
|
330
|
+
fasttext::real threshold) {
|
331
|
+
std::ifstream ifs(filename);
|
332
|
+
if (!ifs.is_open()) {
|
333
|
+
throw std::invalid_argument("Test file cannot be opened!");
|
334
|
+
}
|
335
|
+
fasttext::Meter meter;
|
336
|
+
m.test(ifs, k, threshold, meter);
|
337
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
338
|
+
std::unordered_map<std::string, py::dict> returnedValue;
|
339
|
+
for (int32_t i = 0; i < d->nlabels(); i++) {
|
340
|
+
returnedValue[d->getLabel(i)] = py::dict(
|
341
|
+
"precision"_a = meter.precision(i),
|
342
|
+
"recall"_a = meter.recall(i),
|
343
|
+
"f1score"_a = meter.f1Score(i));
|
344
|
+
}
|
345
|
+
|
346
|
+
return returnedValue;
|
347
|
+
})
|
348
|
+
.def(
|
349
|
+
"getWordId",
|
350
|
+
[](fasttext::FastText& m, const std::string word) {
|
351
|
+
return m.getWordId(word);
|
352
|
+
})
|
353
|
+
.def(
|
354
|
+
"getSubwordId",
|
355
|
+
[](fasttext::FastText& m, const std::string word) {
|
356
|
+
return m.getSubwordId(word);
|
357
|
+
})
|
358
|
+
.def(
|
359
|
+
"getInputVector",
|
360
|
+
[](fasttext::FastText& m, fasttext::Vector& vec, int32_t ind) {
|
361
|
+
m.getInputVector(vec, ind);
|
362
|
+
})
|
363
|
+
.def(
|
364
|
+
"getWordVector",
|
365
|
+
[](fasttext::FastText& m,
|
366
|
+
fasttext::Vector& vec,
|
367
|
+
const std::string word) { m.getWordVector(vec, word); })
|
368
|
+
.def(
|
369
|
+
"getSubwords",
|
370
|
+
[](fasttext::FastText& m,
|
371
|
+
const std::string word,
|
372
|
+
const char* onUnicodeError) {
|
373
|
+
std::vector<std::string> subwords;
|
374
|
+
std::vector<int32_t> ngrams;
|
375
|
+
std::shared_ptr<const fasttext::Dictionary> d = m.getDictionary();
|
376
|
+
d->getSubwords(word, ngrams, subwords);
|
377
|
+
std::vector<py::str> transformedSubwords;
|
378
|
+
|
379
|
+
for (const auto& subword : subwords) {
|
380
|
+
transformedSubwords.push_back(
|
381
|
+
castToPythonString(subword, onUnicodeError));
|
382
|
+
}
|
383
|
+
|
384
|
+
return std::pair<std::vector<py::str>, std::vector<int32_t>>(
|
385
|
+
transformedSubwords, ngrams);
|
386
|
+
})
|
387
|
+
.def("isQuant", [](fasttext::FastText& m) { return m.isQuant(); });
|
388
|
+
}
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the MIT license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
from __future__ import absolute_import
|
8
|
+
from __future__ import division
|
9
|
+
from __future__ import print_function
|
10
|
+
from __future__ import unicode_literals
|
11
|
+
|
12
|
+
from .test_configurations import get_supervised_models
|
13
|
+
from .test_script import gen_tests
|
14
|
+
from .test_script import gen_unit_tests
|
@@ -0,0 +1,239 @@
|
|
1
|
+
# Copyright (c) 2017-present, Facebook, Inc.
|
2
|
+
# All rights reserved.
|
3
|
+
#
|
4
|
+
# This source code is licensed under the MIT license found in the
|
5
|
+
# LICENSE file in the root directory of this source tree.
|
6
|
+
|
7
|
+
from __future__ import absolute_import
|
8
|
+
from __future__ import division
|
9
|
+
from __future__ import print_function
|
10
|
+
from __future__ import unicode_literals
|
11
|
+
|
12
|
+
import multiprocessing
|
13
|
+
|
14
|
+
# This script represents a collection of integration tests
|
15
|
+
# Each integration test comes with a full set of parameters,
|
16
|
+
# a dataset, and expected metrics.
|
17
|
+
# These configurations can be used by various fastText APIs
|
18
|
+
# to confirm some level of correctness.
|
19
|
+
|
20
|
+
|
21
|
+
def max_thread():
|
22
|
+
return multiprocessing.cpu_count() - 1
|
23
|
+
|
24
|
+
|
25
|
+
def check_supervised_configuration(configuration, verbose=1):
|
26
|
+
configuration["args"]["verbose"] = verbose
|
27
|
+
configuration["quant_args"]["verbose"] = verbose
|
28
|
+
return configuration
|
29
|
+
|
30
|
+
|
31
|
+
def check_supervised_configurations(configurations, verbose=1):
|
32
|
+
for i in range(len(configurations)):
|
33
|
+
configurations[i] = check_supervised_configuration(
|
34
|
+
configurations[i], verbose=verbose
|
35
|
+
)
|
36
|
+
return configurations
|
37
|
+
|
38
|
+
|
39
|
+
def flickr_job(thread=None):
|
40
|
+
if thread is None:
|
41
|
+
thread = max_thread()
|
42
|
+
config = {}
|
43
|
+
config["dataset"] = "YFCC100M"
|
44
|
+
config["args"] = {
|
45
|
+
"dim": 256,
|
46
|
+
"wordNgrams": 2,
|
47
|
+
"minCount": 10,
|
48
|
+
"bucket": 10000000,
|
49
|
+
"epoch": 20,
|
50
|
+
"loss": "hs",
|
51
|
+
"minCountLabel": 100,
|
52
|
+
"thread": thread
|
53
|
+
}
|
54
|
+
config["args"]["input"] = "YFCC100M/train"
|
55
|
+
config["quant_args"] = {
|
56
|
+
"dsub": 2,
|
57
|
+
"lr": 0.1,
|
58
|
+
"epoch": 5,
|
59
|
+
"cutoff": 100000,
|
60
|
+
"qnorm": True,
|
61
|
+
"retrain": True,
|
62
|
+
"qout": True
|
63
|
+
}
|
64
|
+
config["quant_args"]["input"] = config["args"]["input"]
|
65
|
+
config["test"] = {
|
66
|
+
"n": 647224,
|
67
|
+
"p1": 0.470,
|
68
|
+
"r1": 0.071,
|
69
|
+
"size": 12060039727,
|
70
|
+
"data": "YFCC100M/test",
|
71
|
+
}
|
72
|
+
# One quant example (to illustrate slack): 0.344, 0.0528, 64506972
|
73
|
+
config["quant_test"] = {
|
74
|
+
"n": 647224,
|
75
|
+
"p1": 0.300,
|
76
|
+
"r1": 0.0450,
|
77
|
+
"size": 70000000,
|
78
|
+
"data": "YFCC100M/test",
|
79
|
+
}
|
80
|
+
return config
|
81
|
+
|
82
|
+
|
83
|
+
def langid_job1(thread=None):
|
84
|
+
if thread is None:
|
85
|
+
thread = max_thread()
|
86
|
+
config = {}
|
87
|
+
config["dataset"] = "langid"
|
88
|
+
config["args"] = {"dim": 16, "minn": 2, "maxn": 4, "thread": thread}
|
89
|
+
config["args"]["input"] = "langid.train"
|
90
|
+
config["quant_args"] = {"qnorm": True, "cutoff": 50000, "retrain": True}
|
91
|
+
config["quant_args"]["input"] = config["args"]["input"]
|
92
|
+
config["test"] = {
|
93
|
+
"n": 10000,
|
94
|
+
"p1": 0.985,
|
95
|
+
"r1": 0.985,
|
96
|
+
"size": 368132610,
|
97
|
+
"data": "langid.valid",
|
98
|
+
}
|
99
|
+
# One quant example (to illustrate slack): 0.984 0.984 932793
|
100
|
+
config["quant_test"] = {
|
101
|
+
"p1": 0.97,
|
102
|
+
"r1": 0.97,
|
103
|
+
"size": 1000000,
|
104
|
+
}
|
105
|
+
config["quant_test"]["n"] = config["test"]["n"]
|
106
|
+
config["quant_test"]["data"] = config["test"]["data"]
|
107
|
+
return config
|
108
|
+
|
109
|
+
|
110
|
+
def langid_job2(thread=None):
|
111
|
+
if thread is None:
|
112
|
+
thread = max_thread()
|
113
|
+
config = langid_job1(thread).copy()
|
114
|
+
config["args"]["loss"] = "hs"
|
115
|
+
return config
|
116
|
+
|
117
|
+
|
118
|
+
def cooking_job1(thread=None):
|
119
|
+
if thread is None:
|
120
|
+
thread = max_thread()
|
121
|
+
config = {}
|
122
|
+
config["dataset"] = "cooking"
|
123
|
+
config["args"] = {
|
124
|
+
"epoch": 25,
|
125
|
+
"lr": 1.0,
|
126
|
+
"wordNgrams": 2,
|
127
|
+
"minCount": 1,
|
128
|
+
"thread": thread,
|
129
|
+
}
|
130
|
+
config["args"]["input"] = "cooking.train"
|
131
|
+
config["quant_args"] = {"qnorm": True, "cutoff": 50000, "retrain": True}
|
132
|
+
config["quant_args"]["input"] = config["args"]["input"]
|
133
|
+
config["test"] = {
|
134
|
+
"n": 3000,
|
135
|
+
"p1": 0.59,
|
136
|
+
"r1": 0.25,
|
137
|
+
"size": 804047585,
|
138
|
+
"data": "cooking.valid",
|
139
|
+
}
|
140
|
+
# One quant example (to illustrate slack): 0.602 0.26 3439172
|
141
|
+
config["quant_test"] = {
|
142
|
+
"p1": 0.55,
|
143
|
+
"r1": 0.20,
|
144
|
+
"size": 4000000,
|
145
|
+
}
|
146
|
+
config["quant_test"]["n"] = config["test"]["n"]
|
147
|
+
config["quant_test"]["data"] = config["test"]["data"]
|
148
|
+
return config
|
149
|
+
|
150
|
+
|
151
|
+
def cooking_job2(thread=None):
|
152
|
+
if thread is None:
|
153
|
+
thread = max_thread()
|
154
|
+
config = cooking_job1(thread).copy()
|
155
|
+
config["args"]["loss"] = "hs"
|
156
|
+
return config
|
157
|
+
|
158
|
+
|
159
|
+
# Supervised models
|
160
|
+
# See https://fasttext.cc/docs/en/supervised-models.html
|
161
|
+
def get_supervised_models(thread=None, verbose=1):
|
162
|
+
if thread is None:
|
163
|
+
thread = max_thread()
|
164
|
+
sup_job_dataset = [
|
165
|
+
"ag_news", "sogou_news", "dbpedia", "yelp_review_polarity",
|
166
|
+
"yelp_review_full", "yahoo_answers", "amazon_review_full",
|
167
|
+
"amazon_review_polarity"
|
168
|
+
]
|
169
|
+
|
170
|
+
sup_params = {
|
171
|
+
"dim": 10,
|
172
|
+
"wordNgrams": 2,
|
173
|
+
"minCount": 1,
|
174
|
+
"bucket": 10000000,
|
175
|
+
"epoch": 5,
|
176
|
+
"thread": thread,
|
177
|
+
"verbose": 1,
|
178
|
+
}
|
179
|
+
quant_params = {
|
180
|
+
"retrain": True,
|
181
|
+
"cutoff": 100000,
|
182
|
+
"qnorm": True,
|
183
|
+
"verbose": 1,
|
184
|
+
}
|
185
|
+
sup_job_lr = [0.25, 0.5, 0.5, 0.1, 0.1, 0.1, 0.05, 0.05]
|
186
|
+
|
187
|
+
sup_job_n = [7600, 60000, 70000, 38000, 50000, 60000, 650000, 400000]
|
188
|
+
|
189
|
+
sup_job_p1 = [0.915, 0.968, 0.983, 0.956, 0.638, 0.723, 0.600, 0.940]
|
190
|
+
sup_job_r1 = [0.915, 0.968, 0.983, 0.956, 0.638, 0.723, 0.600, 0.940]
|
191
|
+
sup_job_size = [
|
192
|
+
405607193, 421445471, 447481878, 427867393, 431292576, 517549567,
|
193
|
+
483742593, 493604598
|
194
|
+
]
|
195
|
+
|
196
|
+
sup_job_quant_p1 = [0.918, 0.965, 0.983, 0.950, 0.625, 0.707, 0.58, 0.920]
|
197
|
+
sup_job_quant_r1 = [0.918, 0.965, 0.983, 0.950, 0.625, 0.707, 0.58, 0.920]
|
198
|
+
sup_job_quant_size = [
|
199
|
+
1600000, 1500000, 1700000, 1600000, 1600000, 1700000, 1600000, 1600000
|
200
|
+
]
|
201
|
+
|
202
|
+
configurations = []
|
203
|
+
for i in range(len(sup_job_dataset)):
|
204
|
+
configuration = {}
|
205
|
+
configuration["dataset"] = sup_job_dataset[i]
|
206
|
+
args = sup_params.copy()
|
207
|
+
quant_args = quant_params.copy()
|
208
|
+
args["lr"] = sup_job_lr[i]
|
209
|
+
args["input"] = sup_job_dataset[i] + ".train"
|
210
|
+
quant_args["lr"] = sup_job_lr[i]
|
211
|
+
quant_args["input"] = sup_job_dataset[i] + ".train"
|
212
|
+
configuration["args"] = args
|
213
|
+
configuration["quant_args"] = quant_args
|
214
|
+
test = {
|
215
|
+
"n": sup_job_n[i],
|
216
|
+
"p1": sup_job_p1[i],
|
217
|
+
"r1": sup_job_r1[i],
|
218
|
+
"size": sup_job_size[i],
|
219
|
+
"data": sup_job_dataset[i] + ".test",
|
220
|
+
}
|
221
|
+
quant_test = {
|
222
|
+
"n": sup_job_n[i],
|
223
|
+
"p1": sup_job_quant_p1[i],
|
224
|
+
"r1": sup_job_quant_r1[i],
|
225
|
+
"size": sup_job_quant_size[i],
|
226
|
+
"data": sup_job_dataset[i] + ".test",
|
227
|
+
}
|
228
|
+
configuration["test"] = test
|
229
|
+
configuration["quant_test"] = quant_test
|
230
|
+
configurations.append(configuration)
|
231
|
+
configurations.append(flickr_job())
|
232
|
+
configurations.append(langid_job1())
|
233
|
+
configurations.append(langid_job2())
|
234
|
+
configurations.append(cooking_job1())
|
235
|
+
configurations.append(cooking_job2())
|
236
|
+
configurations = check_supervised_configurations(
|
237
|
+
configurations, verbose=verbose
|
238
|
+
)
|
239
|
+
return configurations
|