fasttext 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (478) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +20 -1
  4. data/lib/fasttext.rb +3 -0
  5. data/lib/fasttext/classifier.rb +12 -4
  6. data/lib/fasttext/vectorizer.rb +1 -1
  7. data/lib/fasttext/version.rb +1 -1
  8. metadata +4 -473
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/vendor/fastText/CMakeLists.txt +0 -68
  11. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  12. data/vendor/fastText/CONTRIBUTING.md +0 -32
  13. data/vendor/fastText/MANIFEST.in +0 -5
  14. data/vendor/fastText/Makefile +0 -63
  15. data/vendor/fastText/alignment/README.md +0 -53
  16. data/vendor/fastText/alignment/align.py +0 -145
  17. data/vendor/fastText/alignment/eval.py +0 -60
  18. data/vendor/fastText/alignment/example.sh +0 -51
  19. data/vendor/fastText/alignment/unsup_align.py +0 -109
  20. data/vendor/fastText/alignment/utils.py +0 -154
  21. data/vendor/fastText/classification-example.sh +0 -41
  22. data/vendor/fastText/classification-results.sh +0 -94
  23. data/vendor/fastText/crawl/README.md +0 -26
  24. data/vendor/fastText/crawl/dedup.cc +0 -51
  25. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  26. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  27. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  28. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  29. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  30. data/vendor/fastText/docs/api.md +0 -6
  31. data/vendor/fastText/docs/cheatsheet.md +0 -66
  32. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  33. data/vendor/fastText/docs/dataset.md +0 -6
  34. data/vendor/fastText/docs/english-vectors.md +0 -53
  35. data/vendor/fastText/docs/faqs.md +0 -63
  36. data/vendor/fastText/docs/language-identification.md +0 -47
  37. data/vendor/fastText/docs/options.md +0 -50
  38. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  39. data/vendor/fastText/docs/python-module.md +0 -314
  40. data/vendor/fastText/docs/references.md +0 -41
  41. data/vendor/fastText/docs/supervised-models.md +0 -54
  42. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  43. data/vendor/fastText/docs/support.md +0 -58
  44. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  45. data/vendor/fastText/eval.py +0 -95
  46. data/vendor/fastText/get-wikimedia.sh +0 -79
  47. data/vendor/fastText/python/README.md +0 -322
  48. data/vendor/fastText/python/README.rst +0 -406
  49. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  50. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  51. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  52. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  53. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  54. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  55. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  56. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  57. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  58. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  59. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  60. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  61. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  62. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  63. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  64. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  65. data/vendor/fastText/quantization-example.sh +0 -40
  66. data/vendor/fastText/runtests.py +0 -60
  67. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  68. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  69. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  70. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  71. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  72. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  73. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  74. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  75. data/vendor/fastText/setup.cfg +0 -2
  76. data/vendor/fastText/setup.py +0 -203
  77. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  78. data/vendor/fastText/website/README.md +0 -6
  79. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  80. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  81. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  82. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  83. data/vendor/fastText/website/core/Footer.js +0 -127
  84. data/vendor/fastText/website/package.json +0 -12
  85. data/vendor/fastText/website/pages/en/index.js +0 -286
  86. data/vendor/fastText/website/sidebars.json +0 -18
  87. data/vendor/fastText/website/siteConfig.js +0 -102
  88. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  89. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  90. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  91. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  92. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  93. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  94. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  95. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  96. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  97. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  98. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  99. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  100. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  101. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  102. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  103. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  104. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  105. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  106. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  107. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  108. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  109. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  110. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  111. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  112. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  113. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  114. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  115. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  116. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  121. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  122. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  123. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  124. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  125. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  126. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  127. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  128. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  129. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  130. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  131. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  132. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  133. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  134. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  135. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  136. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  137. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  138. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  139. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  140. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  141. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  142. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  143. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  144. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  145. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  146. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  147. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  148. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  149. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  150. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  151. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  152. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  153. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  154. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  155. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  156. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  157. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  158. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  159. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  160. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  161. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  162. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  163. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  164. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  165. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  166. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  167. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  168. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  169. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  170. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  171. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  172. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  173. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  174. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  175. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  176. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  177. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  178. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  179. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  180. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  181. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  182. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  183. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  184. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  185. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  186. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  187. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  188. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  189. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  190. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  191. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  192. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  193. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  194. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  195. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  196. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  197. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  198. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  199. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  200. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  201. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  202. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  203. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  204. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  205. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  206. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  207. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  208. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  209. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  210. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  211. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  212. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  213. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  214. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  215. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  216. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  217. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  218. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  219. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  220. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  221. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  222. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  223. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  224. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  225. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  226. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  227. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  228. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  229. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  230. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  232. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  233. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  237. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  241. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  243. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  245. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  247. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  249. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  251. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  253. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  255. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  257. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  259. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  261. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  263. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  265. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  267. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  269. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  271. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  273. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  275. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  277. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  279. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  281. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  282. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  283. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  284. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  285. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  286. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  287. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  288. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  289. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  290. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  291. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  292. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  293. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  294. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  295. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  296. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  297. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  298. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  299. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  300. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  302. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  304. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  308. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  310. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  312. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  314. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  316. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  318. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  322. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  324. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  326. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  332. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  338. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  342. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  372. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  374. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  375. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  376. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  377. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  378. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  382. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  384. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  386. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  388. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  390. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  392. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  393. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  394. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  395. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  396. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  397. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  398. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  399. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  400. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  401. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  402. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  404. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  426. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  427. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  428. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  429. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  430. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  431. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  432. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  433. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  434. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  435. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  436. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  437. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  438. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  439. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  440. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  441. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  442. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  443. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  444. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  445. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  446. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  447. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  448. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  449. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  450. data/vendor/fastText/website/static/fasttext.css +0 -48
  451. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  452. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  453. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  454. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  455. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  456. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  457. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  458. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  459. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  460. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  461. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  462. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  463. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  464. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  465. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  466. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  467. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  468. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  469. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  470. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  471. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  472. data/vendor/fastText/website/static/img/model-black.png +0 -0
  473. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  474. data/vendor/fastText/website/static/img/model-red.png +0 -0
  475. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  476. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  477. data/vendor/fastText/wikifil.pl +0 -57
  478. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,6 +0,0 @@
1
- Prerequisites
2
- - nodejs
3
-
4
- To build locally, navigate into subfolder website and execute
5
- - npm install
6
- - npm run start
@@ -1,42 +0,0 @@
1
- ---
2
- title: Releasing fastText
3
- author: Edouard Grave
4
- authorURL: https://research.fb.com/people/grave-edouard/
5
- authorFBID: 534178442
6
- ---
7
-
8
- ## Faster, better text classification!
9
-
10
- Understanding the meaning of words that roll off your tongue as you talk, or your fingertips as you tap out posts is one of the biggest technical challenges facing artificial intelligence researchers. But it is an essential need. Automatic text processing forms a key part of the day-to-day interaction with your computer; it’s a critical component of everything from web search and content ranking to spam filtering, and when it works well, it’s completely invisible to you. With the growing amount of online data, there is a need for more flexible tools to better understand the content of very large datasets, in order to provide more accurate classification results.
11
-
12
- To address this need, the [Facebook AI Research (FAIR) lab](https://research.fb.com/category/facebook-ai-research-fair/) is open-sourcing [fastText](https://github.com/facebookresearch/fastText), a library designed to help build scalable solutions for text representation and classification. Our ongoing commitment to collaboration and sharing with the community extends beyond just delivering code. We know it’s important to share our learnings to advance the field, so have also [published](http://arxiv.org/abs/1607.04606) [our research](http://arxiv.org/abs/1607.01759) relating to fastText.
13
-
14
- FastText combines some of the most successful concepts introduced by the natural language processing and machine learning communities in the last few decades. These include representing sentences with bag of words and bag of n-grams, as well as using subword information, and sharing information across classes through a hidden representation. We also employ a hierachical softmax that takes advantage of the unbalanced distribution of the classes to speed up computation. These different concepts are being used for two different tasks: efficient text classification and learning word vector representations.
15
-
16
- <!--truncate-->
17
-
18
- ## Efficient learning for text classification
19
-
20
- Deep neural networks have recently become very popular for text processing. While these models achieve very good performance in limited laboratory practice, they can be slow to train and test, which limits their use on very large datasets.
21
-
22
- FastText helps solve this problem. To be efficient on datasets with very large number of categories, it uses a hierarchical classifier instead of a flat structure, in which the different categories are organized in a tree (think binary tree instead of list). This reduces the time complexities of training and testing text classifiers from linear to logarithmic with respect to the number of classes. FastText also exploits the fact that classes are imbalanced (some classes appearing more often than other) by using the Huffman algorithm to build the tree used to represent categories. The depth in the tree of very frequent categories is therefore smaller than for infrequent ones, leading to further computational efficiency.
23
-
24
- FastText also represents a text by a low dimensional vector, which is obtained by summing vectors corresponding to the words appearing in the text. In fastText, a low dimensional vector is associated to each word of the vocabulary. This hidden representation is shared across all classifiers for different categories, allowing information about words learned for one category to be used by other categories. These kind of representations, called bag of words, ignore word order. In fastText we also use vectors to represent word ngrams to take into account local word order, which is important for many text classification problems.
25
-
26
- Our experiments show that fastText is often on par with deep learning classifiers in terms of accuracy, and many orders of magnitude faster for training and evaluation. With fastText, we were often able to cut training times from several days to just a few seconds, and achieve state-of-the-art performance on many standard problems, such as sentiment analysis or tag prediction.
27
-
28
- ![fastText performance](../../../../img/blog/2016-08-18-blog-post-img1.png)
29
- _Comparison between fastText and deep learning-based methods._
30
-
31
- ## A dedicated tool
32
-
33
- Text classification is very important in the commercial world; spam or clickbait filtering being perhaps the most ubiquitous example. There are tools that design models for general classification problems (such as Vowpal Wabbit or libSVM), but fastText is exclusively dedicated to text classification. This allows it to be quickly trained on extremely large datasets. We have seen results of models trained on more than 1 billion words in less than 10 minutes using a standard multicore CPU. FastText can also classify a half-million sentences among more than 300,000 categories in less than five minutes.
34
-
35
- ## Works on many languages
36
-
37
- Besides text classification, fastText can also be used to learn vector representations of words. It has been designed to work on a variety of languages, including English, German, Spanish, French, and Czech, by taking advantage of the languages morphological structure. It uses a simple yet effective way of incorporating subword information that turns out to work very well for morphologically rich languages like Czech, demonstrating that carefully designed character ngram features are strong source of information to enrich the word representations. FastText can achieve significantly better performance than the popular [word2vec](https://code.google.com/archive/p/word2vec/) tool, or other state-of-the-art morphological word representations.
38
-
39
- ![fastText performance](../../../../img/blog/2016-08-18-blog-post-img2.png)
40
- _Comparison between fastText and state-of-the-art word representations for different languages._
41
-
42
- We hope the introduction of fastText helps the community build better, more scalable solutions for text representation and classification. Delivered as an open-source library, we believe fastText is a valuable addition to the research and engineering communities, which will ultimately help us all design better applications and further advances in language understanding.
@@ -1,60 +0,0 @@
1
- ---
2
- title: fastText on mobile
3
- author: Armand Joulin
4
- authorURL: https://research.fb.com/people/joulin-armand/
5
- authorFBID: 696297201
6
- ---
7
-
8
- Today, the Facebook AI Research (FAIR) team released pre-trained vectors in 294 languages, accompanied by two quick-start tutorials, to increase fastText’s accessibility to the large community of students, software developers, and researchers interested in machine learning. fastText’s models now fit on smartphones and small computers like Raspberry Pi devices thanks to a new functionality that reduces memory usage.
9
-
10
- First open-sourced last summer, [fastText](https://github.com/facebookresearch/fastText) was designed to be accessible to anyone with generic hardware like notebooks and X86 cloud instances, or almost any platform with enough memory. Smartphone and small computer support extend fastText’s accessibility to an even larger community and a greater range of applications.
11
-
12
- <!--truncate-->
13
-
14
- ### fastText on small memory devices
15
-
16
- To reach more people and more applications via mobile phones and other internet-connected devices, this release contains a new functionality that reduces the memory consumed by fastText models. The typical model built on earlier versions use a few gigabytes of memory; this new feature helps to reduce memory to as little as a few hundred kilobytes.
17
-
18
- Squeezing models into reduced memory footprints was made possible through collaboration with the FAIR team that recently released [FAISS](https://github.com/facebookresearch/faiss), an open source library for efficient similarity search and clustering of high-dimensional vectors. The FAIR fastText team published “[FastText.zip: Compressing Text Classification Models](https://arxiv.org/pdf/1612.03651.pdf),” which describes the combination of the two research projects that enabled the reduction to overcome the challenges to shipping models on small memory devices.
19
-
20
- ### Simple yet state-of-the-art text classifier
21
-
22
- fastText is designed to be simple to use for developers, domain experts, and students. Its speed allows you to iterate quickly and refine your models without specialized hardware. fastText models can be trained on more than a billion words on any multicore CPU in less than few minutes and can classify half a million sentences with hundreds of thousands of classes in less than a minute.
23
-
24
- fastText classification compares favorably with more complex neural network architectures implemented for specialized GPU hardware. The performance comparisons were reported in another paper authored by the fastText team, “[Bag of Tricks for Efficient Text Classification](https://arxiv.org/pdf/1607.01759.pdf).” Little or no accuracy is lost with fastText compared with more complex neural network models. For example, fastText performed competitively on sentiment analysis problems when compared to the results of convolutional neural networks (Zhang et al. 2015).
25
-
26
- ![fastText performance](../../../../img/blog/2017-05-02-blog-post-img1.jpg)
27
-
28
- ## How Facebook AI Research engineered fastText’s performance
29
-
30
- Throughout the history of machine learning, research developments have often outpaced hardware performance, and researchers have worked to optimize for practical applications by maximizing accuracy while minimizing computational complexity. Facebook's research teams have developed unique expertise in fitting the best possible model to the hardware available. With fastText, however, one additional constraint is extending machine learning capabilities to everyone who has a multicore CPU computer with a C++ compiler — pretty much everyone developing software or in an engineering role.
31
-
32
- Given this expertise, we were able to build a simple yet powerful library to solve important text classification problems tailored for generic, less powerful hardware. fastText is both impactful as a library for learning text classification and for adding accurate text classification features to applications. fastText also enables developers to add text classification features such as ranking comments with hashtags and ranking reviews based on sentiment analysis without a formal machine learning education.
33
-
34
- Low dimensional vectors were used to improve performance. Large vectors improve accuracy because of the larger number of features in the word vector but are computationally expensive at training time. State of the art performance is possible with low dimensional vectors if the right features are present and the models can scale to a very large corpus. During encoding, vector size is reduced by presenting examples of low dimensional vectors obtained through conventional optimization methods.
35
-
36
- Training time is reduced using a hierarchical softmax based on the Huffman coding tree (a binary tree variant). In operation, search times for the most likely class is also reduced, because each leaf of the tree representing a word vector has an associated probability. Leaves on lower branches have associated descending probabilities. Calculating the probability over the path rapidly narrows to the most likely path as lower probability branches are discarded.
37
-
38
- fastText uses a bag-of-words model to extract features and a linear classifier to train the model. Because the bag-of-words model does not recognize sentence word order, the generalized contextual features of high-frequency words are not shared with low-frequency words, resulting in lower accuracy proportional to lower word frequency. Replacing the bag-of-words model with an n-gram model that recognizes word order would share features of high-frequency word vectors with lower frequency word vectors, but would add complexity, training time, and computational expense. Using fastText, partial n-gram information can be applied during training time as a training setting by selecting the number of words before and after the subject word in a sentence to balance training time and accuracy.
39
-
40
- fastText can achieve better performance than the popular word2vec tool, or other state-of-the-art morphological word representations, and includes many more languages. fastText will receive future improvements from the FAIR team and fastText community making it more accessible.
41
-
42
- Even though the accuracy is comparable, fastText is much faster. When compared with state-of-the-art neural network based models, fastText is 1,000 to 10,000 times faster. This is the result of the simplicity of its implementation that uses low-rank linear models and standard features like bigrams.
43
-
44
- The table below is an indication of fastText’s performance.
45
-
46
- ![fastText performance](../../../../img/blog/2017-05-02-blog-post-img2.jpg)
47
-
48
-
49
- ## Self-paced fastText tutorials
50
-
51
- One of the tutorials accompanying this release explains supervised text classification. By stepping through the tutorial, the developer gains experience building a simple text classifier on a custom dataset. Then the tutorial explains how to tune the model to attain the best possible performance.
52
-
53
- fastText is designed to be extremely fast. This guarantees the responsiveness that developers need to quickly iterate over different settings that affect accuracy. For example, n-grams improve the accuracy of applications like sentiment analysis where word order is important. Hierarchical softmax is shown to increase the speed of applications like hashtag predication where the output space is large.
54
-
55
- In the second tutorial, fastText is used to learn word representations from Wikipedia pages. The tutorial steps through simple ways to test the quality of a model. Queries return a word’s nearest neighbors or given a related pair example, analogies produce the most closely related words to a a queried word. For example, a model can predict that Paris is related to France in the same way as Berlin to Germany. Even words that the model has not been trained on can be tested! fastText looks at groups of characters that build-up the word to produce its representation to find likely candidates for misspelled words and made-up words like ”shiftgear.”
56
-
57
- Students and developers interested in machine learning can get right to work with the newly released self-paced tutorials [available on our website](https://fasttext.cc/docs/en/supervised-tutorial.html). The tutorials are straightforward and do not require advanced knowledge in machine learning. The tutorials also offer insights into other features of the fastText library for more advanced developers.
58
-
59
- Use cases include experimentation, prototyping, and production. fastText can be used as a command line, linked to a C++ application, or used as a library. Community contributed Python and Lua APIs are also available.
60
-
@@ -1,90 +0,0 @@
1
- ---
2
- title: Language identification
3
- author: Edouard Grave
4
- authorURL: https://research.fb.com/people/grave-edouard/
5
- authorFBID: 534178442
6
- ---
7
-
8
- ## Fast and accurate language identification using fastText
9
-
10
- We are excited to announce that we are publishing a fast and accurate tool for text-based language identification. It can recognize more than 170 languages, takes less than 1MB of memory and can classify thousands of documents per second. It is based on fastText library and is released [here](https://fasttext.cc/docs/en/language-identification.html) as open source, free to use by everyone. We are releasing several versions of the model, each optimized for different memory usage, and compared them to the popular tool [langid.py](https://github.com/saffsd/langid.py).
11
-
12
- <!--truncate-->
13
-
14
- ![Evaluation of our models](../../../../img/blog/2017-10-02-blog-post-img1.png)
15
-
16
- Our tool uses various features offered by the fastText library, such as subwords or model compression. In the remainder of this blogpost, we will explain how these work, and how to use them to build a fast and small language detector.
17
-
18
-
19
- ## Training your own language detector
20
-
21
- Building a fast and small language detector with fastText can be done with a few command lines, as we will show below. First, we need a dataset to train our model. Here, we propose to use sentences from the Tatoeba website, which can be downloaded from https://tatoeba.org/eng/downloads. Note that for the sake of simplicity, we use a small quantity of data for this blogpost . If you want to train a state-of-the-art model comparable with our pre-trained model, you will need to use a larger quantity of data.
22
-
23
- ### Training data
24
-
25
- First, let's download the training data:
26
-
27
- ```bash
28
- >> wget http://downloads.tatoeba.org/exports/sentences.tar.bz2
29
- >> bunzip2 sentences.tar.bz2
30
- >> tar xvf sentences.tar
31
- ```
32
- Then, we need to put our training data into fastText format, which is easily done using:
33
- ```bash
34
- >> awk -F"\t" '{print"__label__"$2" "$3}' < sentences.csv | shuf > all.txt
35
- ```
36
- We can then split our training data into training and validation sets:
37
- ```bash
38
- >> head -n 10000 all.txt > valid.txt
39
- >> tail -n +10001 all.txt > train.txt
40
- ```
41
- ### First model
42
- We can now train our first model
43
- ```bash
44
- >> ./fasttext supervised -input train.txt -output langdetect -dim 16
45
- ```
46
- and test it on the held out data:
47
- ```bash
48
- >> ./fasttext test langdetect.bin valid.txt
49
- ```
50
- This model should have an accuracy around 96.5%. Let's see if we can do better, by changing the default parameters.
51
-
52
- ### Using subword features
53
-
54
- The first way to improve our baseline model is to use subword features, which enhance the classifier by taking into account the structure of words. It uses a simple, yet effective way of incorporating such information: each word is represented by the set of all character ngrams of a given length appearing in that word. As an example, when using subwords of length 3, the word skiing is represented by
55
-
56
- { skiing, ski, kii, iin, ing }
57
-
58
- A key advantage of these features is that out-of-vocabulary words, such as misspelled words, can still be represented at test time by their subwords representations. This make text classifiers much more robust, especially for problems with small training sets, or for morphologically rich languages. Users can enable these features by simply specifying the value of the minimum and maximum character ngram size with the command line options -minn and -maxn:
59
- ```bash
60
- >> ./fasttext supervised -input train.txt -output langdetect -dim 16 -minn 2 -maxn 4
61
- ```
62
- In that case, fastText now uses all the character ngrams of length 2, 3 and 4. The accuracy of the classifier should improve, and be above 98.5%. We can also make the training and testing faster, by using the hierarchical softmax:
63
- ```bash
64
- >> ./fasttext supervised -input train.txt -output langdetect -dim 16 -minn 2 -maxn 4 -loss hs
65
- ```
66
- ### Model compression
67
-
68
- Finally, we can make the size of the model file much smaller, by using model compression:
69
- ```bash
70
- >> ./fasttext quantize -input train.txt -output langdetect -qnorm -cutoff 50000 -retrain
71
- ```
72
- After running this command line, you should get a new model, langdetect.ftz, with a file size smaller than 1MB (instead of 350MB for the original model).
73
-
74
- How does model quantization work? It is quite simple, and relies on two operations: weight quantization and feature selection. We now briefly describe these two operations in detail.
75
-
76
- **Weight quantization.** The first operation is to compress the weights of the models using a technique called vector quantization. Quantization is the process of mapping values from a large set (e.g. floating point numbers) to a smaller set (e.g. bytes). Here, we use a variant which is well suited to compress vectors, instead of scalar values. The algorithm, called product quantization, works as follow. First, each vector is split into smaller vectors, for example of dimension 2. Then, we run the k-means algorithm on these sub-vectors, and represent each sub-vector by the closest centroid obtained with k-means. Therefore, each 2-dimension vector is now represented by 1 byte (to store the centroid), instead of 8 bytes (to store the 2 floats), therefore achieving a compression rate of 8. If we instead split the vectors into sub-vectors of dimension 4, we can achieve a compression rate of 16 (but often with a higher distortion rate). This tradeoff between compression and distortion can be controlled using the -dsub command line option, which set the dimension of the sub-vectors.
77
-
78
- **Feature selection.** The second operation we apply to compress models is to remove features which do not have a big influence on the decision of the classifier. For this, our goal is to find the model with a given number of feature (e.g. 50,000 in the previous example) which is the closest from the original model. The solution of this problem is to keep the features (either words, subwords, or ngrams), which have the vectors with the largest norms.
79
-
80
- ### References
81
-
82
- * [Quantization](https://en.wikipedia.org/wiki/Quantization_%28signal_processing%29)
83
- * [Vector quantization](https://en.wikipedia.org/wiki/Vector_quantization)
84
- * [k-means algorithm](https://en.wikipedia.org/wiki/K-means_clustering)
85
- * [Feature selection](https://en.wikipedia.org/wiki/Feature_selection)
86
-
87
- ### ISO codes of languages supported
88
- ```
89
- af als am an ar arz as ast av az azb ba bar bcl be bg bh bn bo bpy br bs bxr ca cbk ce ceb ckb co cs cv cy da de diq dsb dty dv el eml en eo es et eu fa fi fr frr fy ga gd gl gn gom gu gv he hi hif hr hsb ht hu hy ia id ie ilo io is it ja jbo jv ka kk km kn ko krc ku kv kw ky la lb lez li lmo lo lrc lt lv mai mg mhr min mk ml mn mr mrj ms mt mwl my myv mzn nah nap nds ne new nl nn no oc or os pa pam pfl pl pms pnb ps pt qu rm ro ru rue sa sah sc scn sco sd sh si sk sl so sq sr su sv sw ta te tg th tk tl tr tt tyv ug uk ur uz vec vep vi vls vo wa war wuu xal xmf yi yo yue zh
90
- ```
@@ -1,168 +0,0 @@
1
- ---
2
- title: New release of python module
3
- author: Onur Çelebi
4
- authorURL: https://research.fb.com/people/celebi-onur/
5
- authorFBID: 663146146
6
- ---
7
-
8
- Today, we are happy to release a new version of the fastText python library. The main goal of this release is to merge two existing python modules: the official `fastText` module which was available on our github repository and the unofficial `fasttext` module which was available on pypi.org. We hope that this new version will address the confusion due to the previous existence of two similar, but different, python modules.
9
-
10
- The new version of our library is now available on [pypi.org](https://pypi.org/project/fasttext/) as well as on our github repository, and you can find [an overview of its API here](/docs/en/python-module.html).
11
-
12
-
13
-
14
- fastText vs fasttext: what happened?
15
- ----------------------------------
16
- There was an ongoing confusion among our user community about the existence of both `fastText` and `fasttext` modules.
17
-
18
- When fastText was first released in 2016, it was a command line only utility. Very soon, people wanted to use fastText's capabilities from python without having to call a binary for each action. In August 2016, [Bayu Aldi Yansyah](https://github.com/pyk), a developer outside of Facebook, published a python wrapper of fastText. His work was very helpful to a lot of people in our community and he published his unofficial python library on pypi with the pretty straighforward module name `fasttext` (note the lowercase `t`).
19
-
20
- Later, our team began to work on an official python binding of fastText, that was published under the same github repository as the C++ source code. However, the module name for this official library was `fastText` (note the uppercase `T`).
21
-
22
- Last year, Bayu Aldi Yansyah gave us admin access to the pypi project so that we could merge the two libraries.
23
-
24
- To sum up, we ended up with two libraries that had:
25
-
26
- - almost the same name
27
- - different APIs
28
- - different versions
29
- - different ways to install
30
-
31
- That was a very confusing situation for the community.
32
-
33
- What actions did we take?
34
- --------------------------
35
- Today we are merging the two python libraries. We decided to keep the official API and top level functions such as `train_unsupervised` and `train_supervised` as well as returning numpy objects. We remove `cbow`, `skipgram` and `supervised` functions from the unofficial API. However, [we bring nice ideas](#wordvectormodel-and-supervisedmodel-objects) from the unofficial API to the official one. In particular, we liked the pythonic approach of `WordVectorModel`. This new python module is named `fasttext`, and is available on both [pypi](https://pypi.org/project/fasttext/) and our [github](https://github.com/facebookresearch/fastText) repository.
36
-
37
- From now, we will refer to the tool as "fastText", however the name of the python module is `fasttext`.
38
-
39
-
40
-
41
- What is the right way to do now?
42
- --------------------------------
43
-
44
- Before, you would either use `fastText` (uppercase `T`):
45
- ```python
46
- import fastText
47
- # and call:
48
- fastText.train_supervised
49
- fastText.train_unsupervised
50
- ```
51
-
52
- or use `fasttext` (lowercase `t`):
53
- ```python
54
- import fasttext
55
- # and call:
56
- fasttext.cbow
57
- fasttext.skipgram
58
- fasttext.supervised
59
- ```
60
-
61
-
62
- Now, the right way to do is to
63
- `import fasttext` (lowercase `t`)
64
- and use
65
- ```python
66
- import fasttext
67
- # and call:
68
- fasttext.train_supervised
69
- fasttext.train_unsupervised
70
- ```
71
-
72
- We are keeping the lowercase `fasttext` module name, while we keep the `fastText` API.
73
-
74
- This is because:
75
-
76
- - the standard way to name python modules is all lowercases
77
- - the API from `fastText` is exposing numpy arrays, which is widely used by the machine learning community.
78
-
79
-
80
- You can find a more comprehensive overview of our python API [here](/docs/en/python-module.html).
81
-
82
- Should I modify my existing code?
83
- ---------------------------------
84
- Depending on the version of the python module you were using, you might need to do some little modifications on your existing code.
85
-
86
- ### 1) You were using the official `fastText` module:
87
-
88
- You don't have to do much. Just replace your `import fastText` lines by `import fasttext` and everything should work as usual.
89
-
90
- ### 2) You were using the unofficial `fasttext` module:
91
-
92
- If you were using the functions `cbow`, `skipgram`, `supervised` and/or `WordVectorModel`, `SupervisedModel` objects, you were using the unofficial `fasttext` module.
93
-
94
- Updating your code should be pretty straightforward, but it still implies some little changes.
95
-
96
- #### `cbow` function: use `train_unsupervised` instead.
97
- For example, replace:
98
-
99
- ```
100
- fasttext.cbow("train.txt", "model_file", lr=0.05, dim=100, ws=5, epoch=5)
101
- ```
102
- with
103
- ```
104
- model = fasttext.train_unsupervised("train.txt", model='cbow', lr=0.05, dim=100, ws=5, epoch=5)
105
- model.save_model("model_file.bin")
106
- ```
107
-
108
- #### `skipgram` function: use `train_unsupervised` instead.
109
- For example, replace:
110
-
111
- ```
112
- fasttext.skipgram("train.txt", "model_file", lr=0.05, dim=100, ws=5, epoch=5)
113
- ```
114
- with
115
- ```
116
- model = fasttext.train_unsupervised("train.txt", model='skipgram', lr=0.05, dim=100, ws=5, epoch=5)
117
- model.save_model("model_file.bin")
118
- ```
119
-
120
-
121
- #### `supervised` function: use `train_supervised` instead
122
- For example, replace:
123
- ```
124
- fasttext.supervised("train.txt", "model_file", lr=0.1, dim=100, epoch=5, word_ngrams=2, loss='softmax')
125
- ```
126
- with
127
- ```
128
- model = fasttext.train_supervised("train.txt", lr=0.1, dim=100, epoch=5, , word_ngrams=2, loss='softmax')
129
- model.save_model("model_file.bin")
130
- ```
131
-
132
- #### Parameters
133
-
134
- - As you can see, you can use either `word_ngrams` or `wordNgrams` as parameter name. Because the parameter names from the unofficial API are mapped to the official ones: `min_count` to `minCount`, `word_ngrams` to `wordNgrams`, `lr_update_rate` to `lrUpdateRate`, `label_prefix` to `label` and `pretrained_vectors` to `pretrainedVectors`.
135
- - `silent` parameter is not supported. Use `verbose` parameter instead.
136
- - `encoding` parameter is not supported, every input should be encoded in `utf-8`.
137
-
138
-
139
- ### `WordVectorModel` and `SupervisedModel` objects
140
-
141
- Instead of `WordVectorModel` and `SupervisedModel` objects, we return a model object that mimics some nice ideas from the unofficial API.
142
-
143
- ```python
144
- model = fasttext.train_unsupervised("train.txt", model='skipgram')
145
- print(model.words) # list of words in dictionary
146
- print(model['king']) # get the vector of the word 'king'
147
- print('king' in model) # check if a word is in dictionary
148
- ```
149
-
150
-
151
-
152
- ```python
153
- model = fasttext.train_supervised("train.txt")
154
- print(model.words) # list of words in dictionary
155
- print(model.labels) # list of labels
156
- ```
157
-
158
- The model object also contains the arguments of the training:
159
-
160
- ```python
161
- print(model.epoch)
162
- print(model.loss)
163
- print(model.wordNgrams)
164
- ```
165
-
166
- Thank you!
167
- ------------
168
- We want to thank our incredible community. We truly appreciate your feedback, a big thank you to everyone reporting issues and contributing to the project. In particular we want to express how grateful we are to [Bayu Aldi Yansyah](https://github.com/pyk) who did a great job with his python library and for giving us the ownership of the pypi `fasttext` project.
@@ -1,127 +0,0 @@
1
- /**
2
- * Copyright (c) 2017-present, Facebook, Inc.
3
- * All rights reserved.
4
- *
5
- * This source code is licensed under the MIT license found in the
6
- * LICENSE file in the root directory of this source tree.
7
- */
8
-
9
- const React = require("react");
10
-
11
- const githubButton = (
12
- <a
13
- className="github-button"
14
- href="https://github.com/facebookresearch/fastText/"
15
- data-icon="octicon-star"
16
- data-count-href="/fastText/stargazers"
17
- data-count-api="/repos/fastText#stargazers_count"
18
- data-count-aria-label="# stargazers on GitHub"
19
- aria-label="Star this project on GitHub"
20
- >
21
- Star
22
- </a>
23
- );
24
-
25
- class Footer extends React.Component {
26
- render() {
27
- const language = this.props.language || "en";
28
- const currentYear = new Date().getFullYear();
29
- return (
30
- <footer className="nav-footer" id="footer">
31
- <section className="sitemap">
32
- <a href={this.props.config.baseUrl} className="nav-home">
33
- <img
34
- src={this.props.config.baseUrl + this.props.config.footerIcon}
35
- alt={this.props.config.title}
36
- />
37
- </a>
38
- <div>
39
- <h5>Support</h5>
40
- <a
41
- href={
42
- this.props.config.baseUrl + "docs/" + language + "/support.html"
43
- }
44
- >
45
- Getting Started
46
- </a>
47
- <a
48
- href={
49
- this.props.config.baseUrl +
50
- "docs/" +
51
- language +
52
- "/supervised-tutorial.html"
53
- }
54
- >
55
- Tutorials
56
- </a>
57
- <a
58
- href={
59
- this.props.config.baseUrl +
60
- "docs/" +
61
- language +
62
- "/faqs.html"
63
- }
64
- >
65
- FAQs
66
- </a>
67
- <a
68
- href={
69
- this.props.config.baseUrl +
70
- "docs/" +
71
- language +
72
- "/api.html"
73
- }
74
- >
75
- API
76
- </a>
77
- </div>
78
- <div>
79
- <h5>Community</h5>
80
- <a
81
- href="https://www.facebook.com/groups/1174547215919768/"
82
- target="_blank"
83
- >
84
- Facebook Group
85
- </a>
86
- <a
87
- href="http://stackoverflow.com/questions/tagged/fasttext"
88
- target="_blank"
89
- >
90
- Stack Overflow
91
- </a>
92
- <a
93
- href="https://groups.google.com/forum/#!forum/fasttext-library"
94
- target="_blank"
95
- >
96
- Google Group
97
- </a>
98
- </div>
99
- <div>
100
- <h5>More</h5>
101
- <a href={this.props.config.baseUrl + "blog"}>Blog</a>
102
- <a href="https://github.com/facebookresearch/fastText" target="_blank">GitHub</a>
103
- {githubButton}
104
- </div>
105
- </section>
106
-
107
- <a
108
- href="https://code.facebook.com/projects/"
109
- target="_blank"
110
- className="fbOpenSource"
111
- >
112
- <img
113
- src={this.props.config.baseUrl + "img/oss_logo.png"}
114
- alt="Facebook Open Source"
115
- width="170"
116
- height="45"
117
- />
118
- </a>
119
- <section className="copyright">
120
- Copyright &copy; {currentYear} Facebook Inc.
121
- </section>
122
- </footer>
123
- );
124
- }
125
- }
126
-
127
- module.exports = Footer;