fasttext 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +251 -0
  5. data/ext/fasttext/ext.cpp +291 -0
  6. data/ext/fasttext/extconf.rb +15 -0
  7. data/lib/fasttext.rb +41 -0
  8. data/lib/fasttext/classifier.rb +92 -0
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/lib/fasttext/model.rb +60 -0
  11. data/lib/fasttext/vectorizer.rb +58 -0
  12. data/lib/fasttext/version.rb +3 -0
  13. data/vendor/fastText/CMakeLists.txt +68 -0
  14. data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
  15. data/vendor/fastText/CONTRIBUTING.md +32 -0
  16. data/vendor/fastText/LICENSE +21 -0
  17. data/vendor/fastText/MANIFEST.in +5 -0
  18. data/vendor/fastText/Makefile +63 -0
  19. data/vendor/fastText/README.md +339 -0
  20. data/vendor/fastText/alignment/README.md +53 -0
  21. data/vendor/fastText/alignment/align.py +145 -0
  22. data/vendor/fastText/alignment/eval.py +60 -0
  23. data/vendor/fastText/alignment/example.sh +51 -0
  24. data/vendor/fastText/alignment/unsup_align.py +109 -0
  25. data/vendor/fastText/alignment/utils.py +154 -0
  26. data/vendor/fastText/classification-example.sh +41 -0
  27. data/vendor/fastText/classification-results.sh +94 -0
  28. data/vendor/fastText/crawl/README.md +26 -0
  29. data/vendor/fastText/crawl/dedup.cc +51 -0
  30. data/vendor/fastText/crawl/download_crawl.sh +57 -0
  31. data/vendor/fastText/crawl/filter_dedup.sh +13 -0
  32. data/vendor/fastText/crawl/filter_utf8.cc +105 -0
  33. data/vendor/fastText/crawl/process_wet_file.sh +30 -0
  34. data/vendor/fastText/docs/aligned-vectors.md +64 -0
  35. data/vendor/fastText/docs/api.md +6 -0
  36. data/vendor/fastText/docs/cheatsheet.md +66 -0
  37. data/vendor/fastText/docs/crawl-vectors.md +125 -0
  38. data/vendor/fastText/docs/dataset.md +6 -0
  39. data/vendor/fastText/docs/english-vectors.md +53 -0
  40. data/vendor/fastText/docs/faqs.md +63 -0
  41. data/vendor/fastText/docs/language-identification.md +47 -0
  42. data/vendor/fastText/docs/options.md +50 -0
  43. data/vendor/fastText/docs/pretrained-vectors.md +142 -0
  44. data/vendor/fastText/docs/python-module.md +314 -0
  45. data/vendor/fastText/docs/references.md +41 -0
  46. data/vendor/fastText/docs/supervised-models.md +54 -0
  47. data/vendor/fastText/docs/supervised-tutorial.md +349 -0
  48. data/vendor/fastText/docs/support.md +58 -0
  49. data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
  50. data/vendor/fastText/eval.py +95 -0
  51. data/vendor/fastText/get-wikimedia.sh +79 -0
  52. data/vendor/fastText/python/README.md +322 -0
  53. data/vendor/fastText/python/README.rst +406 -0
  54. data/vendor/fastText/python/benchmarks/README.rst +3 -0
  55. data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
  56. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
  57. data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
  58. data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
  59. data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
  60. data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
  61. data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
  62. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
  63. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
  64. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
  65. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
  66. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
  67. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
  68. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
  69. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
  70. data/vendor/fastText/quantization-example.sh +40 -0
  71. data/vendor/fastText/runtests.py +60 -0
  72. data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
  73. data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
  74. data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
  75. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
  76. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
  77. data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
  78. data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
  79. data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
  80. data/vendor/fastText/setup.cfg +2 -0
  81. data/vendor/fastText/setup.py +203 -0
  82. data/vendor/fastText/src/args.cc +320 -0
  83. data/vendor/fastText/src/args.h +68 -0
  84. data/vendor/fastText/src/densematrix.cc +155 -0
  85. data/vendor/fastText/src/densematrix.h +75 -0
  86. data/vendor/fastText/src/dictionary.cc +540 -0
  87. data/vendor/fastText/src/dictionary.h +111 -0
  88. data/vendor/fastText/src/fasttext.cc +821 -0
  89. data/vendor/fastText/src/fasttext.h +191 -0
  90. data/vendor/fastText/src/loss.cc +346 -0
  91. data/vendor/fastText/src/loss.h +163 -0
  92. data/vendor/fastText/src/main.cc +435 -0
  93. data/vendor/fastText/src/matrix.cc +25 -0
  94. data/vendor/fastText/src/matrix.h +44 -0
  95. data/vendor/fastText/src/meter.cc +68 -0
  96. data/vendor/fastText/src/meter.h +69 -0
  97. data/vendor/fastText/src/model.cc +98 -0
  98. data/vendor/fastText/src/model.h +79 -0
  99. data/vendor/fastText/src/productquantizer.cc +251 -0
  100. data/vendor/fastText/src/productquantizer.h +63 -0
  101. data/vendor/fastText/src/quantmatrix.cc +117 -0
  102. data/vendor/fastText/src/quantmatrix.h +60 -0
  103. data/vendor/fastText/src/real.h +15 -0
  104. data/vendor/fastText/src/utils.cc +28 -0
  105. data/vendor/fastText/src/utils.h +43 -0
  106. data/vendor/fastText/src/vector.cc +97 -0
  107. data/vendor/fastText/src/vector.h +61 -0
  108. data/vendor/fastText/tests/fetch_test_data.sh +202 -0
  109. data/vendor/fastText/website/README.md +6 -0
  110. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
  111. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
  112. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
  113. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
  114. data/vendor/fastText/website/core/Footer.js +127 -0
  115. data/vendor/fastText/website/package.json +12 -0
  116. data/vendor/fastText/website/pages/en/index.js +286 -0
  117. data/vendor/fastText/website/sidebars.json +18 -0
  118. data/vendor/fastText/website/siteConfig.js +102 -0
  119. data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
  120. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
  121. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
  122. data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
  123. data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
  124. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
  125. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  126. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  127. data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
  141. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
  142. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
  143. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
  144. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
  145. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
  146. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
  147. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
  148. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
  149. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
  150. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
  151. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
  152. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  153. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
  154. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
  155. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
  156. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
  157. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
  158. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
  159. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
  161. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  162. data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
  163. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
  164. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
  165. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
  166. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
  167. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  168. data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
  169. data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
  170. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  171. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  172. data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
  173. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
  174. data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
  175. data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
  176. data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
  177. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
  178. data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
  179. data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
  180. data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
  181. data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
  182. data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
  183. data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
  184. data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
  185. data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
  186. data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
  187. data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
  188. data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
  189. data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
  190. data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
  191. data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
  192. data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
  193. data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
  194. data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
  195. data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
  196. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
  197. data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
  198. data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
  199. data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
  200. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
  201. data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
  202. data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
  203. data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
  204. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
  205. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
  206. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
  207. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
  208. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
  209. data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
  210. data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
  211. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
  212. data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
  213. data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
  214. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
  215. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
  216. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
  217. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
  218. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
  219. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
  220. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
  221. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
  222. data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
  223. data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
  224. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  225. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  226. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  227. data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
  228. data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
  229. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
  230. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
  231. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
  232. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  233. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
  234. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
  235. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
  236. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
  237. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
  238. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
  239. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
  240. data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
  241. data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
  242. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
  243. data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
  244. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
  245. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
  246. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
  247. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
  248. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
  249. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
  250. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
  251. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
  252. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
  253. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
  254. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
  255. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
  256. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
  257. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
  258. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
  259. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
  260. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
  261. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
  262. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
  263. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
  264. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
  265. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
  266. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
  267. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
  268. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
  269. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
  270. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
  271. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
  272. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
  273. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
  274. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
  275. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
  276. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
  277. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
  278. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
  279. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
  281. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
  282. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
  283. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
  284. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
  285. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
  286. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
  287. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
  288. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
  289. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
  290. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
  291. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
  299. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
  301. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
  302. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
  303. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
  304. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
  305. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
  306. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
  307. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
  308. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
  309. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
  310. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  311. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
  312. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
  313. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
  314. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
  315. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
  316. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
  317. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
  318. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
  319. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
  320. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
  321. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
  322. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
  323. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
  324. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
  326. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
  327. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
  328. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
  329. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
  330. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
  331. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
  332. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
  333. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
  334. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
  335. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
  336. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
  337. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
  338. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
  339. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
  340. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
  341. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
  342. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
  343. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
  344. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
  345. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
  346. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
  347. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
  348. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
  349. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
  350. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
  351. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
  352. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
  353. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
  354. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
  392. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
  394. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
  395. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
  396. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
  397. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
  398. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
  402. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
  403. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  404. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
  405. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
  406. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
  407. data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
  408. data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
  409. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  410. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  411. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  412. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
  413. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
  414. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
  415. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
  416. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
  446. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
  447. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
  448. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
  449. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
  450. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
  451. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
  452. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
  453. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
  454. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
  455. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
  456. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
  457. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
  459. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
  460. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
  461. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
  462. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
  463. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
  464. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  465. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  466. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  467. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  468. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  469. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  470. data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
  471. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
  472. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
  473. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
  474. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
  475. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
  476. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
  477. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
  478. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
  479. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
  480. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
  481. data/vendor/fastText/website/static/fasttext.css +48 -0
  482. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  483. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  484. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  485. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  486. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  487. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  488. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  489. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  490. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  491. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  492. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  493. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  494. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  495. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  496. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  497. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  498. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  499. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  500. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  501. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  502. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  503. data/vendor/fastText/website/static/img/model-black.png +0 -0
  504. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  505. data/vendor/fastText/website/static/img/model-red.png +0 -0
  506. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  507. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  508. data/vendor/fastText/wikifil.pl +57 -0
  509. data/vendor/fastText/word-vector-example.sh +39 -0
  510. metadata +621 -0
@@ -0,0 +1,339 @@
1
+ # fastText
2
+ [fastText](https://fasttext.cc/) is a library for efficient learning of word representations and sentence classification.
3
+
4
+ [![CircleCI](https://circleci.com/gh/facebookresearch/fastText/tree/master.svg?style=svg)](https://circleci.com/gh/facebookresearch/fastText/tree/master)
5
+
6
+ ## Table of contents
7
+
8
+ * [Resources](#resources)
9
+ * [Models](#models)
10
+ * [Supplementary data](#supplementary-data)
11
+ * [FAQ](#faq)
12
+ * [Cheatsheet](#cheatsheet)
13
+ * [Requirements](#requirements)
14
+ * [Building fastText](#building-fasttext)
15
+ * [Getting the source code](#getting-the-source-code)
16
+ * [Building fastText using make (preferred)](#building-fasttext-using-make-preferred)
17
+ * [Building fastText using cmake](#building-fasttext-using-cmake)
18
+ * [Building fastText for Python](#building-fasttext-for-python)
19
+ * [Example use cases](#example-use-cases)
20
+ * [Word representation learning](#word-representation-learning)
21
+ * [Obtaining word vectors for out-of-vocabulary words](#obtaining-word-vectors-for-out-of-vocabulary-words)
22
+ * [Text classification](#text-classification)
23
+ * [Full documentation](#full-documentation)
24
+ * [References](#references)
25
+ * [Enriching Word Vectors with Subword Information](#enriching-word-vectors-with-subword-information)
26
+ * [Bag of Tricks for Efficient Text Classification](#bag-of-tricks-for-efficient-text-classification)
27
+ * [FastText.zip: Compressing text classification models](#fasttextzip-compressing-text-classification-models)
28
+ * [Join the fastText community](#join-the-fasttext-community)
29
+ * [License](#license)
30
+
31
+ ## Resources
32
+
33
+ ### Models
34
+ - Recent state-of-the-art [English word vectors](https://fasttext.cc/docs/en/english-vectors.html).
35
+ - Word vectors for [157 languages trained on Wikipedia and Crawl](https://github.com/facebookresearch/fastText/blob/master/docs/crawl-vectors.md).
36
+ - Models for [language identification](https://fasttext.cc/docs/en/language-identification.html#content) and [various supervised tasks](https://fasttext.cc/docs/en/supervised-models.html#content).
37
+
38
+ ### Supplementary data
39
+ - The preprocessed [YFCC100M data](https://fasttext.cc/docs/en/dataset.html#content) used in [2].
40
+
41
+ ### FAQ
42
+
43
+ You can find [answers to frequently asked questions](https://fasttext.cc/docs/en/faqs.html#content) on our [website](https://fasttext.cc/).
44
+
45
+ ### Cheatsheet
46
+
47
+ We also provide a [cheatsheet](https://fasttext.cc/docs/en/cheatsheet.html#content) full of useful one-liners.
48
+
49
+ ## Requirements
50
+
51
+ We are continuously building and testing our library, CLI and Python bindings under various docker images using [circleci](https://circleci.com/).
52
+
53
+ Generally, **fastText** builds on modern Mac OS and Linux distributions.
54
+ Since it uses some C++11 features, it requires a compiler with good C++11 support.
55
+ These include :
56
+
57
+ * (g++-4.7.2 or newer) or (clang-3.3 or newer)
58
+
59
+ Compilation is carried out using a Makefile, so you will need to have a working **make**.
60
+ If you want to use **cmake** you need at least version 2.8.9.
61
+
62
+ One of the oldest distributions we successfully built and tested the CLI under is [Debian jessie](https://www.debian.org/releases/jessie/).
63
+
64
+ For the word-similarity evaluation script you will need:
65
+
66
+ * Python 2.6 or newer
67
+ * NumPy & SciPy
68
+
69
+ For the python bindings (see the subdirectory python) you will need:
70
+
71
+ * Python version 2.7 or >=3.4
72
+ * NumPy & SciPy
73
+ * [pybind11](https://github.com/pybind/pybind11)
74
+
75
+ One of the oldest distributions we successfully built and tested the Python bindings under is [Debian jessie](https://www.debian.org/releases/jessie/).
76
+
77
+ If these requirements make it impossible for you to use fastText, please open an issue and we will try to accommodate you.
78
+
79
+ ## Building fastText
80
+
81
+ We discuss building the latest stable version of fastText.
82
+
83
+ ### Getting the source code
84
+
85
+ You can find our [latest stable release](https://github.com/facebookresearch/fastText/releases/latest) in the usual place.
86
+
87
+ There is also the master branch that contains all of our most recent work, but comes along with all the usual caveats of an unstable branch. You might want to use this if you are a developer or power-user.
88
+
89
+ ### Building fastText using make (preferred)
90
+
91
+ ```
92
+ $ wget https://github.com/facebookresearch/fastText/archive/v0.9.1.zip
93
+ $ unzip v0.9.1.zip
94
+ $ cd fastText-0.9.1
95
+ $ make
96
+ ```
97
+
98
+ This will produce object files for all the classes as well as the main binary `fasttext`.
99
+ If you do not plan on using the default system-wide compiler, update the two macros defined at the beginning of the Makefile (CC and INCLUDES).
100
+
101
+ ### Building fastText using cmake
102
+
103
+ For now this is not part of a release, so you will need to clone the master branch.
104
+
105
+ ```
106
+ $ git clone https://github.com/facebookresearch/fastText.git
107
+ $ cd fastText
108
+ $ mkdir build && cd build && cmake ..
109
+ $ make && make install
110
+ ```
111
+
112
+ This will create the fasttext binary and also all relevant libraries (shared, static, PIC).
113
+
114
+ ### Building fastText for Python
115
+
116
+ For now this is not part of a release, so you will need to clone the master branch.
117
+
118
+ ```
119
+ $ git clone https://github.com/facebookresearch/fastText.git
120
+ $ cd fastText
121
+ $ pip install .
122
+ ```
123
+
124
+ For further information and introduction see python/README.md
125
+
126
+ ## Example use cases
127
+
128
+ This library has two main use cases: word representation learning and text classification.
129
+ These were described in the two papers [1](#enriching-word-vectors-with-subword-information) and [2](#bag-of-tricks-for-efficient-text-classification).
130
+
131
+ ### Word representation learning
132
+
133
+ In order to learn word vectors, as described in [1](#enriching-word-vectors-with-subword-information), do:
134
+
135
+ ```
136
+ $ ./fasttext skipgram -input data.txt -output model
137
+ ```
138
+
139
+ where `data.txt` is a training file containing `UTF-8` encoded text.
140
+ By default the word vectors will take into account character n-grams from 3 to 6 characters.
141
+ At the end of optimization the program will save two files: `model.bin` and `model.vec`.
142
+ `model.vec` is a text file containing the word vectors, one per line.
143
+ `model.bin` is a binary file containing the parameters of the model along with the dictionary and all hyper parameters.
144
+ The binary file can be used later to compute word vectors or to restart the optimization.
145
+
146
+ ### Obtaining word vectors for out-of-vocabulary words
147
+
148
+ The previously trained model can be used to compute word vectors for out-of-vocabulary words.
149
+ Provided you have a text file `queries.txt` containing words for which you want to compute vectors, use the following command:
150
+
151
+ ```
152
+ $ ./fasttext print-word-vectors model.bin < queries.txt
153
+ ```
154
+
155
+ This will output word vectors to the standard output, one vector per line.
156
+ This can also be used with pipes:
157
+
158
+ ```
159
+ $ cat queries.txt | ./fasttext print-word-vectors model.bin
160
+ ```
161
+
162
+ See the provided scripts for an example. For instance, running:
163
+
164
+ ```
165
+ $ ./word-vector-example.sh
166
+ ```
167
+
168
+ will compile the code, download data, compute word vectors and evaluate them on the rare words similarity dataset RW [Thang et al. 2013].
169
+
170
+ ### Text classification
171
+
172
+ This library can also be used to train supervised text classifiers, for instance for sentiment analysis.
173
+ In order to train a text classifier using the method described in [2](#bag-of-tricks-for-efficient-text-classification), use:
174
+
175
+ ```
176
+ $ ./fasttext supervised -input train.txt -output model
177
+ ```
178
+
179
+ where `train.txt` is a text file containing a training sentence per line along with the labels.
180
+ By default, we assume that labels are words that are prefixed by the string `__label__`.
181
+ This will output two files: `model.bin` and `model.vec`.
182
+ Once the model was trained, you can evaluate it by computing the precision and recall at k (P@k and R@k) on a test set using:
183
+
184
+ ```
185
+ $ ./fasttext test model.bin test.txt k
186
+ ```
187
+
188
+ The argument `k` is optional, and is equal to `1` by default.
189
+
190
+ In order to obtain the k most likely labels for a piece of text, use:
191
+
192
+ ```
193
+ $ ./fasttext predict model.bin test.txt k
194
+ ```
195
+
196
+ or use `predict-prob` to also get the probability for each label
197
+
198
+ ```
199
+ $ ./fasttext predict-prob model.bin test.txt k
200
+ ```
201
+
202
+ where `test.txt` contains a piece of text to classify per line.
203
+ Doing so will print to the standard output the k most likely labels for each line.
204
+ The argument `k` is optional, and equal to `1` by default.
205
+ See `classification-example.sh` for an example use case.
206
+ In order to reproduce results from the paper [2](#bag-of-tricks-for-efficient-text-classification), run `classification-results.sh`, this will download all the datasets and reproduce the results from Table 1.
207
+
208
+ If you want to compute vector representations of sentences or paragraphs, please use:
209
+
210
+ ```
211
+ $ ./fasttext print-sentence-vectors model.bin < text.txt
212
+ ```
213
+
214
+ This assumes that the `text.txt` file contains the paragraphs that you want to get vectors for.
215
+ The program will output one vector representation per line in the file.
216
+
217
+ You can also quantize a supervised model to reduce its memory usage with the following command:
218
+
219
+ ```
220
+ $ ./fasttext quantize -output model
221
+ ```
222
+ This will create a `.ftz` file with a smaller memory footprint. All the standard functionality, like `test` or `predict` work the same way on the quantized models:
223
+ ```
224
+ $ ./fasttext test model.ftz test.txt
225
+ ```
226
+ The quantization procedure follows the steps described in [3](#fasttextzip-compressing-text-classification-models). You can
227
+ run the script `quantization-example.sh` for an example.
228
+
229
+
230
+ ## Full documentation
231
+
232
+ Invoke a command without arguments to list available arguments and their default values:
233
+
234
+ ```
235
+ $ ./fasttext supervised
236
+ Empty input or output path.
237
+
238
+ The following arguments are mandatory:
239
+ -input training file path
240
+ -output output file path
241
+
242
+ The following arguments are optional:
243
+ -verbose verbosity level [2]
244
+
245
+ The following arguments for the dictionary are optional:
246
+ -minCount minimal number of word occurrences [1]
247
+ -minCountLabel minimal number of label occurrences [0]
248
+ -wordNgrams max length of word ngram [1]
249
+ -bucket number of buckets [2000000]
250
+ -minn min length of char ngram [0]
251
+ -maxn max length of char ngram [0]
252
+ -t sampling threshold [0.0001]
253
+ -label labels prefix [__label__]
254
+
255
+ The following arguments for training are optional:
256
+ -lr learning rate [0.1]
257
+ -lrUpdateRate change the rate of updates for the learning rate [100]
258
+ -dim size of word vectors [100]
259
+ -ws size of the context window [5]
260
+ -epoch number of epochs [5]
261
+ -neg number of negatives sampled [5]
262
+ -loss loss function {ns, hs, softmax} [softmax]
263
+ -thread number of threads [12]
264
+ -pretrainedVectors pretrained word vectors for supervised learning []
265
+ -saveOutput whether output params should be saved [0]
266
+
267
+ The following arguments for quantization are optional:
268
+ -cutoff number of words and ngrams to retain [0]
269
+ -retrain finetune embeddings if a cutoff is applied [0]
270
+ -qnorm quantizing the norm separately [0]
271
+ -qout quantizing the classifier [0]
272
+ -dsub size of each sub-vector [2]
273
+ ```
274
+
275
+ Defaults may vary by mode. (Word-representation modes `skipgram` and `cbow` use a default `-minCount` of 5.)
276
+
277
+ ## References
278
+
279
+ Please cite [1](#enriching-word-vectors-with-subword-information) if using this code for learning word representations or [2](#bag-of-tricks-for-efficient-text-classification) if using for text classification.
280
+
281
+ ### Enriching Word Vectors with Subword Information
282
+
283
+ [1] P. Bojanowski\*, E. Grave\*, A. Joulin, T. Mikolov, [*Enriching Word Vectors with Subword Information*](https://arxiv.org/abs/1607.04606)
284
+
285
+ ```
286
+ @article{bojanowski2017enriching,
287
+ title={Enriching Word Vectors with Subword Information},
288
+ author={Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
289
+ journal={Transactions of the Association for Computational Linguistics},
290
+ volume={5},
291
+ year={2017},
292
+ issn={2307-387X},
293
+ pages={135--146}
294
+ }
295
+ ```
296
+
297
+ ### Bag of Tricks for Efficient Text Classification
298
+
299
+ [2] A. Joulin, E. Grave, P. Bojanowski, T. Mikolov, [*Bag of Tricks for Efficient Text Classification*](https://arxiv.org/abs/1607.01759)
300
+
301
+ ```
302
+ @InProceedings{joulin2017bag,
303
+ title={Bag of Tricks for Efficient Text Classification},
304
+ author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Mikolov, Tomas},
305
+ booktitle={Proceedings of the 15th Conference of the European Chapter of the Association for Computational Linguistics: Volume 2, Short Papers},
306
+ month={April},
307
+ year={2017},
308
+ publisher={Association for Computational Linguistics},
309
+ pages={427--431},
310
+ }
311
+ ```
312
+
313
+ ### FastText.zip: Compressing text classification models
314
+
315
+ [3] A. Joulin, E. Grave, P. Bojanowski, M. Douze, H. Jégou, T. Mikolov, [*FastText.zip: Compressing text classification models*](https://arxiv.org/abs/1612.03651)
316
+
317
+ ```
318
+ @article{joulin2016fasttext,
319
+ title={FastText.zip: Compressing text classification models},
320
+ author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Douze, Matthijs and J{\'e}gou, H{\'e}rve and Mikolov, Tomas},
321
+ journal={arXiv preprint arXiv:1612.03651},
322
+ year={2016}
323
+ }
324
+ ```
325
+
326
+ (\* These authors contributed equally.)
327
+
328
+
329
+ ## Join the fastText community
330
+
331
+ * Facebook page: https://www.facebook.com/groups/1174547215919768
332
+ * Google group: https://groups.google.com/forum/#!forum/fasttext-library
333
+ * Contact: [egrave@fb.com](mailto:egrave@fb.com), [bojanowski@fb.com](mailto:bojanowski@fb.com), [ajoulin@fb.com](mailto:ajoulin@fb.com), [tmikolov@fb.com](mailto:tmikolov@fb.com)
334
+
335
+ See the CONTRIBUTING file for information about how to help out.
336
+
337
+ ## License
338
+
339
+ fastText is MIT-licensed.
@@ -0,0 +1,53 @@
1
+ ## Alignment of Word Embeddings
2
+
3
+ This directory provides code for learning alignments between word embeddings in different languages.
4
+
5
+ The code is in Python 3 and requires [NumPy](http://www.numpy.org/).
6
+
7
+ The script `example.sh` shows how to use this code to learn and evaluate a bilingual alignment of word embeddings.
8
+
9
+ The word embeddings used in [1] can be found on the [fastText project page](https://fasttext.cc) and the supervised bilingual lexicons on the [MUSE project page](https://github.com/facebookresearch/MUSE).
10
+
11
+ ### Supervised alignment
12
+
13
+ The script `align.py` aligns word embeddings from two languages using a bilingual lexicon as supervision.
14
+ The details of this approach can be found in [1].
15
+
16
+ ### Unsupervised alignment
17
+
18
+ The script `unsup_align.py` aligns word embeddings from two languages without requiring any supervision.
19
+ The details of this approach can be found in [2].
20
+
21
+ In addition to NumPy, the unsupervised method requires the [Python Optimal Transport](https://pot.readthedocs.io/en/stable/) toolbox.
22
+
23
+ ### Download
24
+
25
+ Wikipedia fastText embeddings aligned with our method can be found [here](https://fasttext.cc/docs/en/aligned-vectors.html).
26
+
27
+ ### References
28
+
29
+ If you use the supervised alignment method, please cite:
30
+
31
+ [1] A. Joulin, P. Bojanowski, T. Mikolov, H. Jegou, E. Grave, [*Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion*](https://arxiv.org/abs/1804.07745)
32
+
33
+ ```
34
+ @InProceedings{joulin2018loss,
35
+ title={Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion},
36
+ author={Joulin, Armand and Bojanowski, Piotr and Mikolov, Tomas and J\'egou, Herv\'e and Grave, Edouard},
37
+ year={2018},
38
+ booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
39
+ }
40
+ ```
41
+
42
+ If you use the unsupervised alignment method, please cite:
43
+
44
+ [2] E. Grave, A. Joulin, Q. Berthet, [*Unsupervised Alignment of Embeddings with Wasserstein Procrustes*](https://arxiv.org/abs/1805.11222)
45
+
46
+ ```
47
+ @article{grave2018unsupervised,
48
+ title={Unsupervised Alignment of Embeddings with Wasserstein Procrustes},
49
+ author={Grave, Edouard and Joulin, Armand and Berthet, Quentin},
50
+ journal={arXiv preprint arXiv:1805.11222},
51
+ year={2018}
52
+ }
53
+ ```
@@ -0,0 +1,145 @@
1
+ #!/usr/bin/env python3
2
+ # -*- coding: utf-8 -*-
3
+ #
4
+ # Copyright (c) 2018-present, Facebook, Inc.
5
+ # All rights reserved.
6
+ #
7
+ # This source code is licensed under the license found in the
8
+ # LICENSE file in the root directory of this source tree.
9
+
10
+ import numpy as np
11
+ import argparse
12
+ from utils import *
13
+ import sys
14
+
15
+ parser = argparse.ArgumentParser(description='RCSLS for supervised word alignment')
16
+
17
+ parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
18
+ parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
19
+ parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
20
+
21
+ parser.add_argument("--dico_train", type=str, default='', help="train dictionary")
22
+ parser.add_argument("--dico_test", type=str, default='', help="validation dictionary")
23
+
24
+ parser.add_argument("--output", type=str, default='', help="where to save aligned embeddings")
25
+
26
+ parser.add_argument("--knn", type=int, default=10, help="number of nearest neighbors in RCSL/CSLS")
27
+ parser.add_argument("--maxneg", type=int, default=200000, help="Maximum number of negatives for the Extended RCSLS")
28
+ parser.add_argument("--maxsup", type=int, default=-1, help="Maximum number of training examples")
29
+ parser.add_argument("--maxload", type=int, default=200000, help="Maximum number of loaded vectors")
30
+
31
+ parser.add_argument("--model", type=str, default="none", help="Set of constraints: spectral or none")
32
+ parser.add_argument("--reg", type=float, default=0.0 , help='regularization parameters')
33
+
34
+ parser.add_argument("--lr", type=float, default=1.0, help='learning rate')
35
+ parser.add_argument("--niter", type=int, default=10, help='number of iterations')
36
+ parser.add_argument('--sgd', action='store_true', help='use sgd')
37
+ parser.add_argument("--batchsize", type=int, default=10000, help="batch size for sgd")
38
+
39
+ params = parser.parse_args()
40
+
41
+ ###### SPECIFIC FUNCTIONS ######
42
+ # functions specific to RCSLS
43
+ # the rest of the functions are in utils.py
44
+
45
+ def getknn(sc, x, y, k=10):
46
+ sidx = np.argpartition(sc, -k, axis=1)[:, -k:]
47
+ ytopk = y[sidx.flatten(), :]
48
+ ytopk = ytopk.reshape(sidx.shape[0], sidx.shape[1], y.shape[1])
49
+ f = np.sum(sc[np.arange(sc.shape[0])[:, None], sidx])
50
+ df = np.dot(ytopk.sum(1).T, x)
51
+ return f / k, df / k
52
+
53
+
54
+ def rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, knn=10):
55
+ X_trans = np.dot(X_src, R.T)
56
+ f = 2 * np.sum(X_trans * Y_tgt)
57
+ df = 2 * np.dot(Y_tgt.T, X_src)
58
+ fk0, dfk0 = getknn(np.dot(X_trans, Z_tgt.T), X_src, Z_tgt, knn)
59
+ fk1, dfk1 = getknn(np.dot(np.dot(Z_src, R.T), Y_tgt.T).T, Y_tgt, Z_src, knn)
60
+ f = f - fk0 -fk1
61
+ df = df - dfk0 - dfk1.T
62
+ return -f / X_src.shape[0], -df / X_src.shape[0]
63
+
64
+
65
+ def proj_spectral(R):
66
+ U, s, V = np.linalg.svd(R)
67
+ s[s > 1] = 1
68
+ s[s < 0] = 0
69
+ return np.dot(U, np.dot(np.diag(s), V))
70
+
71
+
72
+ ###### MAIN ######
73
+
74
+ # load word embeddings
75
+ words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
76
+ words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
77
+
78
+ # load validation bilingual lexicon
79
+ src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
80
+
81
+ # word --> vector indices
82
+ idx_src = idx(words_src)
83
+ idx_tgt = idx(words_tgt)
84
+
85
+ # load train bilingual lexicon
86
+ pairs = load_pairs(params.dico_train, idx_src, idx_tgt)
87
+ if params.maxsup > 0 and params.maxsup < len(pairs):
88
+ pairs = pairs[:params.maxsup]
89
+
90
+ # selecting training vector pairs
91
+ X_src, Y_tgt = select_vectors_from_pairs(x_src, x_tgt, pairs)
92
+
93
+ # adding negatives for RCSLS
94
+ Z_src = x_src[:params.maxneg, :]
95
+ Z_tgt = x_tgt[:params.maxneg, :]
96
+
97
+ # initialization:
98
+ R = procrustes(X_src, Y_tgt)
99
+ nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
100
+ print("[init -- Procrustes] NN: %.4f"%(nnacc))
101
+ sys.stdout.flush()
102
+
103
+ # optimization
104
+ fold, Rold = 0, []
105
+ niter, lr = params.niter, params.lr
106
+
107
+ for it in range(0, niter + 1):
108
+ if lr < 1e-4:
109
+ break
110
+
111
+ if params.sgd:
112
+ indices = np.random.choice(X_src.shape[0], size=params.batchsize, replace=False)
113
+ f, df = rcsls(X_src[indices, :], Y_tgt[indices, :], Z_src, Z_tgt, R, params.knn)
114
+ else:
115
+ f, df = rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, params.knn)
116
+
117
+ if params.reg > 0:
118
+ R *= (1 - lr * params.reg)
119
+ R -= lr * df
120
+ if params.model == "spectral":
121
+ R = proj_spectral(R)
122
+
123
+ print("[it=%d] f = %.4f" % (it, f))
124
+ sys.stdout.flush()
125
+
126
+ if f > fold and it > 0 and not params.sgd:
127
+ lr /= 2
128
+ f, R = fold, Rold
129
+
130
+ fold, Rold = f, R
131
+
132
+ if (it > 0 and it % 10 == 0) or it == niter:
133
+ nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
134
+ print("[it=%d] NN = %.4f - Coverage = %.4f" % (it, nnacc, len(src2tgt) / lexicon_size))
135
+
136
+ nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
137
+ print("[final] NN = %.4f - Coverage = %.4f" % (nnacc, len(src2tgt) / lexicon_size))
138
+
139
+ if params.output != "":
140
+ print("Saving all aligned vectors at %s" % params.output)
141
+ words_full, x_full = load_vectors(params.src_emb, maxload=-1, center=params.center, verbose=False)
142
+ x = np.dot(x_full, R.T)
143
+ x /= np.linalg.norm(x, axis=1)[:, np.newaxis] + 1e-8
144
+ save_vectors(params.output, x, words_full)
145
+ save_matrix(params.output + "-mat", R)