fasttext 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (498) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/LICENSE.txt +18 -18
  4. data/README.md +39 -12
  5. data/ext/fasttext/ext.cpp +108 -101
  6. data/ext/fasttext/extconf.rb +7 -9
  7. data/lib/fasttext.rb +3 -0
  8. data/lib/fasttext/classifier.rb +25 -7
  9. data/lib/fasttext/vectorizer.rb +7 -2
  10. data/lib/fasttext/version.rb +1 -1
  11. data/vendor/fastText/README.md +3 -3
  12. data/vendor/fastText/src/args.cc +179 -6
  13. data/vendor/fastText/src/args.h +29 -1
  14. data/vendor/fastText/src/autotune.cc +477 -0
  15. data/vendor/fastText/src/autotune.h +89 -0
  16. data/vendor/fastText/src/densematrix.cc +27 -7
  17. data/vendor/fastText/src/densematrix.h +10 -2
  18. data/vendor/fastText/src/fasttext.cc +125 -114
  19. data/vendor/fastText/src/fasttext.h +31 -52
  20. data/vendor/fastText/src/main.cc +32 -13
  21. data/vendor/fastText/src/meter.cc +148 -2
  22. data/vendor/fastText/src/meter.h +24 -2
  23. data/vendor/fastText/src/model.cc +0 -1
  24. data/vendor/fastText/src/real.h +0 -1
  25. data/vendor/fastText/src/utils.cc +25 -0
  26. data/vendor/fastText/src/utils.h +29 -0
  27. data/vendor/fastText/src/vector.cc +0 -1
  28. metadata +16 -539
  29. data/lib/fasttext/ext.bundle +0 -0
  30. data/vendor/fastText/CMakeLists.txt +0 -68
  31. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  32. data/vendor/fastText/CONTRIBUTING.md +0 -32
  33. data/vendor/fastText/MANIFEST.in +0 -5
  34. data/vendor/fastText/Makefile +0 -63
  35. data/vendor/fastText/alignment/README.md +0 -53
  36. data/vendor/fastText/alignment/align.py +0 -145
  37. data/vendor/fastText/alignment/eval.py +0 -60
  38. data/vendor/fastText/alignment/example.sh +0 -51
  39. data/vendor/fastText/alignment/unsup_align.py +0 -109
  40. data/vendor/fastText/alignment/utils.py +0 -154
  41. data/vendor/fastText/classification-example.sh +0 -41
  42. data/vendor/fastText/classification-results.sh +0 -94
  43. data/vendor/fastText/crawl/README.md +0 -26
  44. data/vendor/fastText/crawl/dedup.cc +0 -51
  45. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  46. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  47. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  48. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  49. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  50. data/vendor/fastText/docs/api.md +0 -6
  51. data/vendor/fastText/docs/cheatsheet.md +0 -66
  52. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  53. data/vendor/fastText/docs/dataset.md +0 -6
  54. data/vendor/fastText/docs/english-vectors.md +0 -53
  55. data/vendor/fastText/docs/faqs.md +0 -63
  56. data/vendor/fastText/docs/language-identification.md +0 -47
  57. data/vendor/fastText/docs/options.md +0 -50
  58. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  59. data/vendor/fastText/docs/python-module.md +0 -314
  60. data/vendor/fastText/docs/references.md +0 -41
  61. data/vendor/fastText/docs/supervised-models.md +0 -54
  62. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  63. data/vendor/fastText/docs/support.md +0 -58
  64. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  65. data/vendor/fastText/eval.py +0 -95
  66. data/vendor/fastText/get-wikimedia.sh +0 -79
  67. data/vendor/fastText/python/README.md +0 -322
  68. data/vendor/fastText/python/README.rst +0 -406
  69. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  70. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  71. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  72. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  73. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  74. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  75. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  76. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  77. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  78. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  79. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  80. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  81. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  82. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  83. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  84. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  85. data/vendor/fastText/quantization-example.sh +0 -40
  86. data/vendor/fastText/runtests.py +0 -60
  87. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  88. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  89. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  90. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  91. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  92. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  93. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  94. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  95. data/vendor/fastText/setup.cfg +0 -2
  96. data/vendor/fastText/setup.py +0 -203
  97. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  98. data/vendor/fastText/website/README.md +0 -6
  99. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  100. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  101. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  102. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  103. data/vendor/fastText/website/core/Footer.js +0 -127
  104. data/vendor/fastText/website/package.json +0 -12
  105. data/vendor/fastText/website/pages/en/index.js +0 -286
  106. data/vendor/fastText/website/sidebars.json +0 -18
  107. data/vendor/fastText/website/siteConfig.js +0 -102
  108. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  109. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  110. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  111. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  112. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  113. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  114. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  115. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  116. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  121. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  122. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  123. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  124. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  125. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  126. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  127. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  141. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  142. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  143. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  144. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  145. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  146. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  147. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  148. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  149. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  150. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  151. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  152. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  153. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  154. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  155. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  156. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  157. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  158. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  159. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  161. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  162. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  163. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  164. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  165. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  166. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  167. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  168. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  169. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  170. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  171. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  172. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  173. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  174. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  175. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  176. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  177. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  178. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  179. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  180. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  181. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  182. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  183. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  184. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  185. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  186. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  187. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  188. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  189. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  190. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  191. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  192. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  193. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  194. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  195. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  196. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  197. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  198. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  199. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  200. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  201. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  202. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  203. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  204. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  205. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  206. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  207. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  208. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  209. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  210. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  211. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  212. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  213. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  214. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  215. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  216. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  217. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  218. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  219. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  220. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  221. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  222. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  223. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  224. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  225. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  226. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  227. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  228. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  229. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  230. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  232. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  233. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  237. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  241. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  243. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  245. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  247. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  249. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  251. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  253. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  255. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  257. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  259. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  261. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  263. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  265. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  267. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  269. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  271. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  273. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  275. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  277. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  279. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  280. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  281. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  282. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  283. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  284. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  285. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  286. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  287. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  288. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  289. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  290. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  291. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  299. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  302. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  304. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  308. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  310. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  312. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  314. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  316. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  318. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  322. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  324. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  326. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  332. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  338. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  342. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  392. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  394. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  395. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  396. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  397. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  398. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  402. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  404. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  446. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  447. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  448. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  449. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  450. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  451. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  452. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  453. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  454. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  455. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  456. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  457. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  459. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  460. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  461. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  462. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  463. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  464. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  465. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  466. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  467. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  468. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  469. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  470. data/vendor/fastText/website/static/fasttext.css +0 -48
  471. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  472. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  473. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  474. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  475. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  476. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  477. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  478. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  479. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  480. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  481. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  482. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  483. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  484. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  485. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  486. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  487. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  488. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  489. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  490. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  491. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  492. data/vendor/fastText/website/static/img/model-black.png +0 -0
  493. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  494. data/vendor/fastText/website/static/img/model-red.png +0 -0
  495. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  496. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  497. data/vendor/fastText/wikifil.pl +0 -57
  498. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,6 +0,0 @@
1
- Prerequisites
2
- - nodejs
3
-
4
- To build locally, navigate into subfolder website and execute
5
- - npm install
6
- - npm run start
@@ -1,42 +0,0 @@
1
- ---
2
- title: Releasing fastText
3
- author: Edouard Grave
4
- authorURL: https://research.fb.com/people/grave-edouard/
5
- authorFBID: 534178442
6
- ---
7
-
8
- ## Faster, better text classification!
9
-
10
- Understanding the meaning of words that roll off your tongue as you talk, or your fingertips as you tap out posts is one of the biggest technical challenges facing artificial intelligence researchers. But it is an essential need. Automatic text processing forms a key part of the day-to-day interaction with your computer; it’s a critical component of everything from web search and content ranking to spam filtering, and when it works well, it’s completely invisible to you. With the growing amount of online data, there is a need for more flexible tools to better understand the content of very large datasets, in order to provide more accurate classification results.
11
-
12
- To address this need, the [Facebook AI Research (FAIR) lab](https://research.fb.com/category/facebook-ai-research-fair/) is open-sourcing [fastText](https://github.com/facebookresearch/fastText), a library designed to help build scalable solutions for text representation and classification. Our ongoing commitment to collaboration and sharing with the community extends beyond just delivering code. We know it’s important to share our learnings to advance the field, so have also [published](http://arxiv.org/abs/1607.04606) [our research](http://arxiv.org/abs/1607.01759) relating to fastText.
13
-
14
- FastText combines some of the most successful concepts introduced by the natural language processing and machine learning communities in the last few decades. These include representing sentences with bag of words and bag of n-grams, as well as using subword information, and sharing information across classes through a hidden representation. We also employ a hierachical softmax that takes advantage of the unbalanced distribution of the classes to speed up computation. These different concepts are being used for two different tasks: efficient text classification and learning word vector representations.
15
-
16
- <!--truncate-->
17
-
18
- ## Efficient learning for text classification
19
-
20
- Deep neural networks have recently become very popular for text processing. While these models achieve very good performance in limited laboratory practice, they can be slow to train and test, which limits their use on very large datasets.
21
-
22
- FastText helps solve this problem. To be efficient on datasets with very large number of categories, it uses a hierarchical classifier instead of a flat structure, in which the different categories are organized in a tree (think binary tree instead of list). This reduces the time complexities of training and testing text classifiers from linear to logarithmic with respect to the number of classes. FastText also exploits the fact that classes are imbalanced (some classes appearing more often than other) by using the Huffman algorithm to build the tree used to represent categories. The depth in the tree of very frequent categories is therefore smaller than for infrequent ones, leading to further computational efficiency.
23
-
24
- FastText also represents a text by a low dimensional vector, which is obtained by summing vectors corresponding to the words appearing in the text. In fastText, a low dimensional vector is associated to each word of the vocabulary. This hidden representation is shared across all classifiers for different categories, allowing information about words learned for one category to be used by other categories. These kind of representations, called bag of words, ignore word order. In fastText we also use vectors to represent word ngrams to take into account local word order, which is important for many text classification problems.
25
-
26
- Our experiments show that fastText is often on par with deep learning classifiers in terms of accuracy, and many orders of magnitude faster for training and evaluation. With fastText, we were often able to cut training times from several days to just a few seconds, and achieve state-of-the-art performance on many standard problems, such as sentiment analysis or tag prediction.
27
-
28
- ![fastText performance](../../../../img/blog/2016-08-18-blog-post-img1.png)
29
- _Comparison between fastText and deep learning-based methods._
30
-
31
- ## A dedicated tool
32
-
33
- Text classification is very important in the commercial world; spam or clickbait filtering being perhaps the most ubiquitous example. There are tools that design models for general classification problems (such as Vowpal Wabbit or libSVM), but fastText is exclusively dedicated to text classification. This allows it to be quickly trained on extremely large datasets. We have seen results of models trained on more than 1 billion words in less than 10 minutes using a standard multicore CPU. FastText can also classify a half-million sentences among more than 300,000 categories in less than five minutes.
34
-
35
- ## Works on many languages
36
-
37
- Besides text classification, fastText can also be used to learn vector representations of words. It has been designed to work on a variety of languages, including English, German, Spanish, French, and Czech, by taking advantage of the languages morphological structure. It uses a simple yet effective way of incorporating subword information that turns out to work very well for morphologically rich languages like Czech, demonstrating that carefully designed character ngram features are strong source of information to enrich the word representations. FastText can achieve significantly better performance than the popular [word2vec](https://code.google.com/archive/p/word2vec/) tool, or other state-of-the-art morphological word representations.
38
-
39
- ![fastText performance](../../../../img/blog/2016-08-18-blog-post-img2.png)
40
- _Comparison between fastText and state-of-the-art word representations for different languages._
41
-
42
- We hope the introduction of fastText helps the community build better, more scalable solutions for text representation and classification. Delivered as an open-source library, we believe fastText is a valuable addition to the research and engineering communities, which will ultimately help us all design better applications and further advances in language understanding.
@@ -1,60 +0,0 @@
1
- ---
2
- title: fastText on mobile
3
- author: Armand Joulin
4
- authorURL: https://research.fb.com/people/joulin-armand/
5
- authorFBID: 696297201
6
- ---
7
-
8
- Today, the Facebook AI Research (FAIR) team released pre-trained vectors in 294 languages, accompanied by two quick-start tutorials, to increase fastText’s accessibility to the large community of students, software developers, and researchers interested in machine learning. fastText’s models now fit on smartphones and small computers like Raspberry Pi devices thanks to a new functionality that reduces memory usage.
9
-
10
- First open-sourced last summer, [fastText](https://github.com/facebookresearch/fastText) was designed to be accessible to anyone with generic hardware like notebooks and X86 cloud instances, or almost any platform with enough memory. Smartphone and small computer support extend fastText’s accessibility to an even larger community and a greater range of applications.
11
-
12
- <!--truncate-->
13
-
14
- ### fastText on small memory devices
15
-
16
- To reach more people and more applications via mobile phones and other internet-connected devices, this release contains a new functionality that reduces the memory consumed by fastText models. The typical model built on earlier versions use a few gigabytes of memory; this new feature helps to reduce memory to as little as a few hundred kilobytes.
17
-
18
- Squeezing models into reduced memory footprints was made possible through collaboration with the FAIR team that recently released [FAISS](https://github.com/facebookresearch/faiss), an open source library for efficient similarity search and clustering of high-dimensional vectors. The FAIR fastText team published “[FastText.zip: Compressing Text Classification Models](https://arxiv.org/pdf/1612.03651.pdf),” which describes the combination of the two research projects that enabled the reduction to overcome the challenges to shipping models on small memory devices.
19
-
20
- ### Simple yet state-of-the-art text classifier
21
-
22
- fastText is designed to be simple to use for developers, domain experts, and students. Its speed allows you to iterate quickly and refine your models without specialized hardware. fastText models can be trained on more than a billion words on any multicore CPU in less than few minutes and can classify half a million sentences with hundreds of thousands of classes in less than a minute.
23
-
24
- fastText classification compares favorably with more complex neural network architectures implemented for specialized GPU hardware. The performance comparisons were reported in another paper authored by the fastText team, “[Bag of Tricks for Efficient Text Classification](https://arxiv.org/pdf/1607.01759.pdf).” Little or no accuracy is lost with fastText compared with more complex neural network models. For example, fastText performed competitively on sentiment analysis problems when compared to the results of convolutional neural networks (Zhang et al. 2015).
25
-
26
- ![fastText performance](../../../../img/blog/2017-05-02-blog-post-img1.jpg)
27
-
28
- ## How Facebook AI Research engineered fastText’s performance
29
-
30
- Throughout the history of machine learning, research developments have often outpaced hardware performance, and researchers have worked to optimize for practical applications by maximizing accuracy while minimizing computational complexity. Facebook's research teams have developed unique expertise in fitting the best possible model to the hardware available. With fastText, however, one additional constraint is extending machine learning capabilities to everyone who has a multicore CPU computer with a C++ compiler — pretty much everyone developing software or in an engineering role.
31
-
32
- Given this expertise, we were able to build a simple yet powerful library to solve important text classification problems tailored for generic, less powerful hardware. fastText is both impactful as a library for learning text classification and for adding accurate text classification features to applications. fastText also enables developers to add text classification features such as ranking comments with hashtags and ranking reviews based on sentiment analysis without a formal machine learning education.
33
-
34
- Low dimensional vectors were used to improve performance. Large vectors improve accuracy because of the larger number of features in the word vector but are computationally expensive at training time. State of the art performance is possible with low dimensional vectors if the right features are present and the models can scale to a very large corpus. During encoding, vector size is reduced by presenting examples of low dimensional vectors obtained through conventional optimization methods.
35
-
36
- Training time is reduced using a hierarchical softmax based on the Huffman coding tree (a binary tree variant). In operation, search times for the most likely class is also reduced, because each leaf of the tree representing a word vector has an associated probability. Leaves on lower branches have associated descending probabilities. Calculating the probability over the path rapidly narrows to the most likely path as lower probability branches are discarded.
37
-
38
- fastText uses a bag-of-words model to extract features and a linear classifier to train the model. Because the bag-of-words model does not recognize sentence word order, the generalized contextual features of high-frequency words are not shared with low-frequency words, resulting in lower accuracy proportional to lower word frequency. Replacing the bag-of-words model with an n-gram model that recognizes word order would share features of high-frequency word vectors with lower frequency word vectors, but would add complexity, training time, and computational expense. Using fastText, partial n-gram information can be applied during training time as a training setting by selecting the number of words before and after the subject word in a sentence to balance training time and accuracy.
39
-
40
- fastText can achieve better performance than the popular word2vec tool, or other state-of-the-art morphological word representations, and includes many more languages. fastText will receive future improvements from the FAIR team and fastText community making it more accessible.
41
-
42
- Even though the accuracy is comparable, fastText is much faster. When compared with state-of-the-art neural network based models, fastText is 1,000 to 10,000 times faster. This is the result of the simplicity of its implementation that uses low-rank linear models and standard features like bigrams.
43
-
44
- The table below is an indication of fastText’s performance.
45
-
46
- ![fastText performance](../../../../img/blog/2017-05-02-blog-post-img2.jpg)
47
-
48
-
49
- ## Self-paced fastText tutorials
50
-
51
- One of the tutorials accompanying this release explains supervised text classification. By stepping through the tutorial, the developer gains experience building a simple text classifier on a custom dataset. Then the tutorial explains how to tune the model to attain the best possible performance.
52
-
53
- fastText is designed to be extremely fast. This guarantees the responsiveness that developers need to quickly iterate over different settings that affect accuracy. For example, n-grams improve the accuracy of applications like sentiment analysis where word order is important. Hierarchical softmax is shown to increase the speed of applications like hashtag predication where the output space is large.
54
-
55
- In the second tutorial, fastText is used to learn word representations from Wikipedia pages. The tutorial steps through simple ways to test the quality of a model. Queries return a word’s nearest neighbors or given a related pair example, analogies produce the most closely related words to a a queried word. For example, a model can predict that Paris is related to France in the same way as Berlin to Germany. Even words that the model has not been trained on can be tested! fastText looks at groups of characters that build-up the word to produce its representation to find likely candidates for misspelled words and made-up words like ”shiftgear.”
56
-
57
- Students and developers interested in machine learning can get right to work with the newly released self-paced tutorials [available on our website](https://fasttext.cc/docs/en/supervised-tutorial.html). The tutorials are straightforward and do not require advanced knowledge in machine learning. The tutorials also offer insights into other features of the fastText library for more advanced developers.
58
-
59
- Use cases include experimentation, prototyping, and production. fastText can be used as a command line, linked to a C++ application, or used as a library. Community contributed Python and Lua APIs are also available.
60
-
@@ -1,90 +0,0 @@
1
- ---
2
- title: Language identification
3
- author: Edouard Grave
4
- authorURL: https://research.fb.com/people/grave-edouard/
5
- authorFBID: 534178442
6
- ---
7
-
8
- ## Fast and accurate language identification using fastText
9
-
10
- We are excited to announce that we are publishing a fast and accurate tool for text-based language identification. It can recognize more than 170 languages, takes less than 1MB of memory and can classify thousands of documents per second. It is based on fastText library and is released [here](https://fasttext.cc/docs/en/language-identification.html) as open source, free to use by everyone. We are releasing several versions of the model, each optimized for different memory usage, and compared them to the popular tool [langid.py](https://github.com/saffsd/langid.py).
11
-
12
- <!--truncate-->
13
-
14
- ![Evaluation of our models](../../../../img/blog/2017-10-02-blog-post-img1.png)
15
-
16
- Our tool uses various features offered by the fastText library, such as subwords or model compression. In the remainder of this blogpost, we will explain how these work, and how to use them to build a fast and small language detector.
17
-
18
-
19
- ## Training your own language detector
20
-
21
- Building a fast and small language detector with fastText can be done with a few command lines, as we will show below. First, we need a dataset to train our model. Here, we propose to use sentences from the Tatoeba website, which can be downloaded from https://tatoeba.org/eng/downloads. Note that for the sake of simplicity, we use a small quantity of data for this blogpost . If you want to train a state-of-the-art model comparable with our pre-trained model, you will need to use a larger quantity of data.
22
-
23
- ### Training data
24
-
25
- First, let's download the training data:
26
-
27
- ```bash
28
- >> wget http://downloads.tatoeba.org/exports/sentences.tar.bz2
29
- >> bunzip2 sentences.tar.bz2
30
- >> tar xvf sentences.tar
31
- ```
32
- Then, we need to put our training data into fastText format, which is easily done using:
33
- ```bash
34
- >> awk -F"\t" '{print"__label__"$2" "$3}' < sentences.csv | shuf > all.txt
35
- ```
36
- We can then split our training data into training and validation sets:
37
- ```bash
38
- >> head -n 10000 all.txt > valid.txt
39
- >> tail -n +10001 all.txt > train.txt
40
- ```
41
- ### First model
42
- We can now train our first model
43
- ```bash
44
- >> ./fasttext supervised -input train.txt -output langdetect -dim 16
45
- ```
46
- and test it on the held out data:
47
- ```bash
48
- >> ./fasttext test langdetect.bin valid.txt
49
- ```
50
- This model should have an accuracy around 96.5%. Let's see if we can do better, by changing the default parameters.
51
-
52
- ### Using subword features
53
-
54
- The first way to improve our baseline model is to use subword features, which enhance the classifier by taking into account the structure of words. It uses a simple, yet effective way of incorporating such information: each word is represented by the set of all character ngrams of a given length appearing in that word. As an example, when using subwords of length 3, the word skiing is represented by
55
-
56
- { skiing, ski, kii, iin, ing }
57
-
58
- A key advantage of these features is that out-of-vocabulary words, such as misspelled words, can still be represented at test time by their subwords representations. This make text classifiers much more robust, especially for problems with small training sets, or for morphologically rich languages. Users can enable these features by simply specifying the value of the minimum and maximum character ngram size with the command line options -minn and -maxn:
59
- ```bash
60
- >> ./fasttext supervised -input train.txt -output langdetect -dim 16 -minn 2 -maxn 4
61
- ```
62
- In that case, fastText now uses all the character ngrams of length 2, 3 and 4. The accuracy of the classifier should improve, and be above 98.5%. We can also make the training and testing faster, by using the hierarchical softmax:
63
- ```bash
64
- >> ./fasttext supervised -input train.txt -output langdetect -dim 16 -minn 2 -maxn 4 -loss hs
65
- ```
66
- ### Model compression
67
-
68
- Finally, we can make the size of the model file much smaller, by using model compression:
69
- ```bash
70
- >> ./fasttext quantize -input train.txt -output langdetect -qnorm -cutoff 50000 -retrain
71
- ```
72
- After running this command line, you should get a new model, langdetect.ftz, with a file size smaller than 1MB (instead of 350MB for the original model).
73
-
74
- How does model quantization work? It is quite simple, and relies on two operations: weight quantization and feature selection. We now briefly describe these two operations in detail.
75
-
76
- **Weight quantization.** The first operation is to compress the weights of the models using a technique called vector quantization. Quantization is the process of mapping values from a large set (e.g. floating point numbers) to a smaller set (e.g. bytes). Here, we use a variant which is well suited to compress vectors, instead of scalar values. The algorithm, called product quantization, works as follow. First, each vector is split into smaller vectors, for example of dimension 2. Then, we run the k-means algorithm on these sub-vectors, and represent each sub-vector by the closest centroid obtained with k-means. Therefore, each 2-dimension vector is now represented by 1 byte (to store the centroid), instead of 8 bytes (to store the 2 floats), therefore achieving a compression rate of 8. If we instead split the vectors into sub-vectors of dimension 4, we can achieve a compression rate of 16 (but often with a higher distortion rate). This tradeoff between compression and distortion can be controlled using the -dsub command line option, which set the dimension of the sub-vectors.
77
-
78
- **Feature selection.** The second operation we apply to compress models is to remove features which do not have a big influence on the decision of the classifier. For this, our goal is to find the model with a given number of feature (e.g. 50,000 in the previous example) which is the closest from the original model. The solution of this problem is to keep the features (either words, subwords, or ngrams), which have the vectors with the largest norms.
79
-
80
- ### References
81
-
82
- * [Quantization](https://en.wikipedia.org/wiki/Quantization_%28signal_processing%29)
83
- * [Vector quantization](https://en.wikipedia.org/wiki/Vector_quantization)
84
- * [k-means algorithm](https://en.wikipedia.org/wiki/K-means_clustering)
85
- * [Feature selection](https://en.wikipedia.org/wiki/Feature_selection)
86
-
87
- ### ISO codes of languages supported
88
- ```
89
- af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs bxr ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es et eu fa fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia id ie ilo io is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li lmo lo lrc lt lv mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne new nl nn no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru rue sa sah sc scn sco sd sh si sk sl so sq sr su sv sw ta te tg th tk tl tr tt tyv ug uk ur uz vec vep vi vls vo wa war wuu xal xmf yi yo yue zh
90
- ```
@@ -1,168 +0,0 @@
1
- ---
2
- title: New release of python module
3
- author: Onur Çelebi
4
- authorURL: https://research.fb.com/people/celebi-onur/
5
- authorFBID: 663146146
6
- ---
7
-
8
- Today, we are happy to release a new version of the fastText python library. The main goal of this release is to merge two existing python modules: the official `fastText` module which was available on our github repository and the unofficial `fasttext` module which was available on pypi.org. We hope that this new version will address the confusion due to the previous existence of two similar, but different, python modules.
9
-
10
- The new version of our library is now available on [pypi.org](https://pypi.org/project/fasttext/) as well as on our github repository, and you can find [an overview of its API here](/docs/en/python-module.html).
11
-
12
-
13
-
14
- fastText vs fasttext: what happened?
15
- ----------------------------------
16
- There was an ongoing confusion among our user community about the existence of both `fastText` and `fasttext` modules.
17
-
18
- When fastText was first released in 2016, it was a command line only utility. Very soon, people wanted to use fastText's capabilities from python without having to call a binary for each action. In August 2016, [Bayu Aldi Yansyah](https://github.com/pyk), a developer outside of Facebook, published a python wrapper of fastText. His work was very helpful to a lot of people in our community and he published his unofficial python library on pypi with the pretty straighforward module name `fasttext` (note the lowercase `t`).
19
-
20
- Later, our team began to work on an official python binding of fastText, that was published under the same github repository as the C++ source code. However, the module name for this official library was `fastText` (note the uppercase `T`).
21
-
22
- Last year, Bayu Aldi Yansyah gave us admin access to the pypi project so that we could merge the two libraries.
23
-
24
- To sum up, we ended up with two libraries that had:
25
-
26
- - almost the same name
27
- - different APIs
28
- - different versions
29
- - different ways to install
30
-
31
- That was a very confusing situation for the community.
32
-
33
- What actions did we take?
34
- --------------------------
35
- Today we are merging the two python libraries. We decided to keep the official API and top level functions such as `train_unsupervised` and `train_supervised` as well as returning numpy objects. We remove `cbow`, `skipgram` and `supervised` functions from the unofficial API. However, [we bring nice ideas](#wordvectormodel-and-supervisedmodel-objects) from the unofficial API to the official one. In particular, we liked the pythonic approach of `WordVectorModel`. This new python module is named `fasttext`, and is available on both [pypi](https://pypi.org/project/fasttext/) and our [github](https://github.com/facebookresearch/fastText) repository.
36
-
37
- From now, we will refer to the tool as "fastText", however the name of the python module is `fasttext`.
38
-
39
-
40
-
41
- What is the right way to do now?
42
- --------------------------------
43
-
44
- Before, you would either use `fastText` (uppercase `T`):
45
- ```python
46
- import fastText
47
- # and call:
48
- fastText.train_supervised
49
- fastText.train_unsupervised
50
- ```
51
-
52
- or use `fasttext` (lowercase `t`):
53
- ```python
54
- import fasttext
55
- # and call:
56
- fasttext.cbow
57
- fasttext.skipgram
58
- fasttext.supervised
59
- ```
60
-
61
-
62
- Now, the right way to do is to
63
- `import fasttext` (lowercase `t`)
64
- and use
65
- ```python
66
- import fasttext
67
- # and call:
68
- fasttext.train_supervised
69
- fasttext.train_unsupervised
70
- ```
71
-
72
- We are keeping the lowercase `fasttext` module name, while we keep the `fastText` API.
73
-
74
- This is because:
75
-
76
- - the standard way to name python modules is all lowercases
77
- - the API from `fastText` is exposing numpy arrays, which is widely used by the machine learning community.
78
-
79
-
80
- You can find a more comprehensive overview of our python API [here](/docs/en/python-module.html).
81
-
82
- Should I modify my existing code?
83
- ---------------------------------
84
- Depending on the version of the python module you were using, you might need to do some little modifications on your existing code.
85
-
86
- ### 1) You were using the official `fastText` module:
87
-
88
- You don't have to do much. Just replace your `import fastText` lines by `import fasttext` and everything should work as usual.
89
-
90
- ### 2) You were using the unofficial `fasttext` module:
91
-
92
- If you were using the functions `cbow`, `skipgram`, `supervised` and/or `WordVectorModel`, `SupervisedModel` objects, you were using the unofficial `fasttext` module.
93
-
94
- Updating your code should be pretty straightforward, but it still implies some little changes.
95
-
96
- #### `cbow` function: use `train_unsupervised` instead.
97
- For example, replace:
98
-
99
- ```
100
- fasttext.cbow("train.txt", "model_file", lr=0.05, dim=100, ws=5, epoch=5)
101
- ```
102
- with
103
- ```
104
- model = fasttext.train_unsupervised("train.txt", model='cbow', lr=0.05, dim=100, ws=5, epoch=5)
105
- model.save_model("model_file.bin")
106
- ```
107
-
108
- #### `skipgram` function: use `train_unsupervised` instead.
109
- For example, replace:
110
-
111
- ```
112
- fasttext.skipgram("train.txt", "model_file", lr=0.05, dim=100, ws=5, epoch=5)
113
- ```
114
- with
115
- ```
116
- model = fasttext.train_unsupervised("train.txt", model='skipgram', lr=0.05, dim=100, ws=5, epoch=5)
117
- model.save_model("model_file.bin")
118
- ```
119
-
120
-
121
- #### `supervised` function: use `train_supervised` instead
122
- For example, replace:
123
- ```
124
- fasttext.supervised("train.txt", "model_file", lr=0.1, dim=100, epoch=5, word_ngrams=2, loss='softmax')
125
- ```
126
- with
127
- ```
128
- model = fasttext.train_supervised("train.txt", lr=0.1, dim=100, epoch=5, , word_ngrams=2, loss='softmax')
129
- model.save_model("model_file.bin")
130
- ```
131
-
132
- #### Parameters
133
-
134
- - As you can see, you can use either `word_ngrams` or `wordNgrams` as parameter name. Because the parameter names from the unofficial API are mapped to the official ones: `min_count` to `minCount`, `word_ngrams` to `wordNgrams`, `lr_update_rate` to `lrUpdateRate`, `label_prefix` to `label` and `pretrained_vectors` to `pretrainedVectors`.
135
- - `silent` parameter is not supported. Use `verbose` parameter instead.
136
- - `encoding` parameter is not supported, every input should be encoded in `utf-8`.
137
-
138
-
139
- ### `WordVectorModel` and `SupervisedModel` objects
140
-
141
- Instead of `WordVectorModel` and `SupervisedModel` objects, we return a model object that mimics some nice ideas from the unofficial API.
142
-
143
- ```python
144
- model = fasttext.train_unsupervised("train.txt", model='skipgram')
145
- print(model.words) # list of words in dictionary
146
- print(model['king']) # get the vector of the word 'king'
147
- print('king' in model) # check if a word is in dictionary
148
- ```
149
-
150
-
151
-
152
- ```python
153
- model = fasttext.train_supervised("train.txt")
154
- print(model.words) # list of words in dictionary
155
- print(model.labels) # list of labels
156
- ```
157
-
158
- The model object also contains the arguments of the training:
159
-
160
- ```python
161
- print(model.epoch)
162
- print(model.loss)
163
- print(model.wordNgrams)
164
- ```
165
-
166
- Thank you!
167
- ------------
168
- We want to thank our incredible community. We truly appreciate your feedback, a big thank you to everyone reporting issues and contributing to the project. In particular we want to express how grateful we are to [Bayu Aldi Yansyah](https://github.com/pyk) who did a great job with his python library and for giving us the ownership of the pypi `fasttext` project.
@@ -1,127 +0,0 @@
1
- /**
2
- * Copyright (c) 2017-present, Facebook, Inc.
3
- * All rights reserved.
4
- *
5
- * This source code is licensed under the MIT license found in the
6
- * LICENSE file in the root directory of this source tree.
7
- */
8
-
9
- const React = require("react");
10
-
11
- const githubButton = (
12
- <a
13
- className="github-button"
14
- href="https://github.com/facebookresearch/fastText/"
15
- data-icon="octicon-star"
16
- data-count-href="/fastText/stargazers"
17
- data-count-api="/repos/fastText#stargazers_count"
18
- data-count-aria-label="# stargazers on GitHub"
19
- aria-label="Star this project on GitHub"
20
- >
21
- Star
22
- </a>
23
- );
24
-
25
- class Footer extends React.Component {
26
- render() {
27
- const language = this.props.language || "en";
28
- const currentYear = new Date().getFullYear();
29
- return (
30
- <footer className="nav-footer" id="footer">
31
- <section className="sitemap">
32
- <a href={this.props.config.baseUrl} className="nav-home">
33
- <img
34
- src={this.props.config.baseUrl + this.props.config.footerIcon}
35
- alt={this.props.config.title}
36
- />
37
- </a>
38
- <div>
39
- <h5>Support</h5>
40
- <a
41
- href={
42
- this.props.config.baseUrl + "docs/" + language + "/support.html"
43
- }
44
- >
45
- Getting Started
46
- </a>
47
- <a
48
- href={
49
- this.props.config.baseUrl +
50
- "docs/" +
51
- language +
52
- "/supervised-tutorial.html"
53
- }
54
- >
55
- Tutorials
56
- </a>
57
- <a
58
- href={
59
- this.props.config.baseUrl +
60
- "docs/" +
61
- language +
62
- "/faqs.html"
63
- }
64
- >
65
- FAQs
66
- </a>
67
- <a
68
- href={
69
- this.props.config.baseUrl +
70
- "docs/" +
71
- language +
72
- "/api.html"
73
- }
74
- >
75
- API
76
- </a>
77
- </div>
78
- <div>
79
- <h5>Community</h5>
80
- <a
81
- href="https://www.facebook.com/groups/1174547215919768/"
82
- target="_blank"
83
- >
84
- Facebook Group
85
- </a>
86
- <a
87
- href="http://stackoverflow.com/questions/tagged/fasttext"
88
- target="_blank"
89
- >
90
- Stack Overflow
91
- </a>
92
- <a
93
- href="https://groups.google.com/forum/#!forum/fasttext-library"
94
- target="_blank"
95
- >
96
- Google Group
97
- </a>
98
- </div>
99
- <div>
100
- <h5>More</h5>
101
- <a href={this.props.config.baseUrl + "blog"}>Blog</a>
102
- <a href="https://github.com/facebookresearch/fastText" target="_blank">GitHub</a>
103
- {githubButton}
104
- </div>
105
- </section>
106
-
107
- <a
108
- href="https://code.facebook.com/projects/"
109
- target="_blank"
110
- className="fbOpenSource"
111
- >
112
- <img
113
- src={this.props.config.baseUrl + "img/oss_logo.png"}
114
- alt="Facebook Open Source"
115
- width="170"
116
- height="45"
117
- />
118
- </a>
119
- <section className="copyright">
120
- Copyright &copy; {currentYear} Facebook Inc.
121
- </section>
122
- </footer>
123
- );
124
- }
125
- }
126
-
127
- module.exports = Footer;