fasttext 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (478) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +20 -1
  4. data/lib/fasttext.rb +3 -0
  5. data/lib/fasttext/classifier.rb +12 -4
  6. data/lib/fasttext/vectorizer.rb +1 -1
  7. data/lib/fasttext/version.rb +1 -1
  8. metadata +4 -473
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/vendor/fastText/CMakeLists.txt +0 -68
  11. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  12. data/vendor/fastText/CONTRIBUTING.md +0 -32
  13. data/vendor/fastText/MANIFEST.in +0 -5
  14. data/vendor/fastText/Makefile +0 -63
  15. data/vendor/fastText/alignment/README.md +0 -53
  16. data/vendor/fastText/alignment/align.py +0 -145
  17. data/vendor/fastText/alignment/eval.py +0 -60
  18. data/vendor/fastText/alignment/example.sh +0 -51
  19. data/vendor/fastText/alignment/unsup_align.py +0 -109
  20. data/vendor/fastText/alignment/utils.py +0 -154
  21. data/vendor/fastText/classification-example.sh +0 -41
  22. data/vendor/fastText/classification-results.sh +0 -94
  23. data/vendor/fastText/crawl/README.md +0 -26
  24. data/vendor/fastText/crawl/dedup.cc +0 -51
  25. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  26. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  27. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  28. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  29. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  30. data/vendor/fastText/docs/api.md +0 -6
  31. data/vendor/fastText/docs/cheatsheet.md +0 -66
  32. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  33. data/vendor/fastText/docs/dataset.md +0 -6
  34. data/vendor/fastText/docs/english-vectors.md +0 -53
  35. data/vendor/fastText/docs/faqs.md +0 -63
  36. data/vendor/fastText/docs/language-identification.md +0 -47
  37. data/vendor/fastText/docs/options.md +0 -50
  38. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  39. data/vendor/fastText/docs/python-module.md +0 -314
  40. data/vendor/fastText/docs/references.md +0 -41
  41. data/vendor/fastText/docs/supervised-models.md +0 -54
  42. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  43. data/vendor/fastText/docs/support.md +0 -58
  44. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  45. data/vendor/fastText/eval.py +0 -95
  46. data/vendor/fastText/get-wikimedia.sh +0 -79
  47. data/vendor/fastText/python/README.md +0 -322
  48. data/vendor/fastText/python/README.rst +0 -406
  49. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  50. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  51. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  52. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  53. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  54. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  55. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  56. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  57. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  58. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  59. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  60. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  61. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  62. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  63. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  64. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  65. data/vendor/fastText/quantization-example.sh +0 -40
  66. data/vendor/fastText/runtests.py +0 -60
  67. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  68. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  69. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  70. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  71. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  72. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  73. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  74. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  75. data/vendor/fastText/setup.cfg +0 -2
  76. data/vendor/fastText/setup.py +0 -203
  77. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  78. data/vendor/fastText/website/README.md +0 -6
  79. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  80. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  81. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  82. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  83. data/vendor/fastText/website/core/Footer.js +0 -127
  84. data/vendor/fastText/website/package.json +0 -12
  85. data/vendor/fastText/website/pages/en/index.js +0 -286
  86. data/vendor/fastText/website/sidebars.json +0 -18
  87. data/vendor/fastText/website/siteConfig.js +0 -102
  88. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  89. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  90. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  91. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  92. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  93. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  94. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  95. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  96. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  97. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  98. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  99. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  100. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  101. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  102. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  103. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  104. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  105. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  106. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  107. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  108. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  109. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  110. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  111. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  112. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  113. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  114. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  115. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  116. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  121. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  122. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  123. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  124. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  125. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  126. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  127. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  128. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  129. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  130. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  131. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  132. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  133. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  134. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  135. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  136. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  137. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  138. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  139. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  140. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  141. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  142. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  143. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  144. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  145. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  146. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  147. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  148. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  149. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  150. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  151. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  152. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  153. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  154. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  155. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  156. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  157. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  158. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  159. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  160. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  161. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  162. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  163. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  164. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  165. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  166. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  167. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  168. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  169. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  170. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  171. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  172. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  173. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  174. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  175. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  176. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  177. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  178. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  179. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  180. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  181. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  182. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  183. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  184. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  185. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  186. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  187. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  188. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  189. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  190. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  191. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  192. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  193. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  194. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  195. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  196. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  197. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  198. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  199. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  200. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  201. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  202. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  203. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  204. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  205. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  206. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  207. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  208. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  209. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  210. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  211. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  212. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  213. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  214. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  215. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  216. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  217. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  218. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  219. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  220. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  221. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  222. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  223. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  224. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  225. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  226. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  227. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  228. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  229. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  230. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  232. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  233. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  237. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  241. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  243. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  245. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  247. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  249. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  251. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  253. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  255. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  257. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  259. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  261. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  263. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  265. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  267. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  269. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  271. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  273. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  275. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  277. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  279. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  281. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  282. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  283. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  284. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  285. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  286. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  287. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  288. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  289. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  290. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  291. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  292. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  293. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  294. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  295. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  296. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  297. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  298. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  299. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  300. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  302. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  304. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  308. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  310. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  312. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  314. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  316. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  318. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  322. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  324. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  326. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  332. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  338. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  342. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  372. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  374. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  375. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  376. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  377. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  378. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  382. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  384. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  386. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  388. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  390. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  392. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  393. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  394. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  395. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  396. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  397. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  398. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  399. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  400. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  401. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  402. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  404. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  426. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  427. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  428. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  429. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  430. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  431. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  432. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  433. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  434. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  435. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  436. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  437. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  438. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  439. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  440. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  441. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  442. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  443. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  444. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  445. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  446. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  447. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  448. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  449. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  450. data/vendor/fastText/website/static/fasttext.css +0 -48
  451. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  452. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  453. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  454. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  455. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  456. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  457. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  458. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  459. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  460. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  461. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  462. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  463. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  464. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  465. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  466. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  467. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  468. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  469. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  470. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  471. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  472. data/vendor/fastText/website/static/img/model-black.png +0 -0
  473. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  474. data/vendor/fastText/website/static/img/model-red.png +0 -0
  475. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  476. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  477. data/vendor/fastText/wikifil.pl +0 -57
  478. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,38 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for SVO
10
- DIR=data/SVO-tensor-dataset
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
- pushd $FASTTEXTDIR
15
- make opt
16
- popd
17
- ft=${FASTTEXTDIR}/fasttext
18
-
19
- ## Train model and test it on validation:
20
-
21
- dim=200
22
- epoch=3
23
- model=svo
24
-
25
- echo "---- train ----"
26
- time $ft supervised -input ${DIR}/ft_svo_data_train_1000000.dat \
27
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
28
-
29
- echo "computing raw hit@5%..."
30
- $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
31
-
32
-
33
- echo "---- train + valid ----"
34
- time $ft supervised -input ${DIR}/ft_svo_data-valid+train.dat \
35
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
36
-
37
- echo "computing raw hit@5%..."
38
- $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for WN11
10
- DIR=data/wordnet-mlj12/
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
-
15
- pushd $FASTTEXTDIR
16
- make opt
17
- popd
18
- ft=${FASTTEXTDIR}/fasttext
19
-
20
- g++ -std=c++0x eval.cpp -o eval
21
-
22
- # Train model and test it:
23
- dim=100
24
- epoch=100
25
- neg=500
26
- model=data/wn
27
- pred=data/wnpred
28
-
29
- echo "---- train ----"
30
- $ft supervised -input ${DIR}/ft_wordnet-mlj12-train.txt \
31
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
32
-
33
- echo "computing raw hits@10..."
34
- $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
35
-
36
- echo "computing filtered hit@10..."
37
- $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
38
- ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
39
-
40
- echo "---- train+val ----"
41
- $ft supervised -input ${DIR}/ft_wordnet-mlj12-valid+train.txt \
42
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
43
-
44
- echo "computing raw hits@10..."
45
- $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
46
-
47
- echo "computing filtered hit@10..."
48
- $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
49
- ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2016-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- # This script applies quantization to the models from Table 1 in:
11
- # Bag of Tricks for Efficient Text Classification, arXiv 1607.01759, 2016
12
-
13
- set -e
14
-
15
- DATASET=(
16
- ag_news
17
- sogou_news
18
- dbpedia
19
- yelp_review_polarity
20
- yelp_review_full
21
- yahoo_answers
22
- amazon_review_full
23
- amazon_review_polarity
24
- )
25
-
26
- # These learning rates were chosen by validation on a subset of the training set.
27
- LR=( 0.25 0.5 0.5 0.1 0.1 0.1 0.05 0.05 )
28
-
29
- RESULTDIR=result
30
- DATADIR=data
31
-
32
- echo 'Warning! Make sure you run the classification-results.sh script before this one'
33
- echo 'Otherwise you can expect the commands in this script to fail'
34
-
35
- for i in {0..7}
36
- do
37
- echo "Working on dataset ${DATASET[i]}"
38
- ../../fasttext quantize -input "${DATADIR}/${DATASET[i]}.train" \
39
- -output "${RESULTDIR}/${DATASET[i]}" -lr "${LR[i]}" \
40
- -thread 4 -qnorm -retrain -epoch 5 -cutoff 100000 > /dev/null
41
- ../../fasttext test "${RESULTDIR}/${DATASET[i]}.ftz" \
42
- "${DATADIR}/${DATASET[i]}.test"
43
- done
@@ -1,2 +0,0 @@
1
- [metadata]
2
- description-file = README.md
@@ -1,203 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- # Copyright (c) 2017-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- from __future__ import absolute_import
11
- from __future__ import division
12
- from __future__ import print_function
13
- from __future__ import unicode_literals
14
-
15
- from setuptools import setup, Extension
16
- from setuptools.command.build_ext import build_ext
17
- import sys
18
- import setuptools
19
- import os
20
- import subprocess
21
- import platform
22
- import io
23
-
24
- __version__ = '0.9.1'
25
- FASTTEXT_SRC = "src"
26
-
27
- # Based on https://github.com/pybind/python_example
28
-
29
- class get_pybind_include(object):
30
- """Helper class to determine the pybind11 include path
31
-
32
- The purpose of this class is to postpone importing pybind11
33
- until it is actually installed, so that the ``get_include()``
34
- method can be invoked. """
35
-
36
- def __init__(self, user=False):
37
- try:
38
- import pybind11
39
- except ImportError:
40
- if subprocess.call([sys.executable, '-m', 'pip', 'install', 'pybind11']):
41
- raise RuntimeError('pybind11 install failed.')
42
-
43
- self.user = user
44
-
45
- def __str__(self):
46
- import pybind11
47
- return pybind11.get_include(self.user)
48
-
49
- try:
50
- coverage_index = sys.argv.index('--coverage')
51
- except ValueError:
52
- coverage = False
53
- else:
54
- del sys.argv[coverage_index]
55
- coverage = True
56
-
57
- fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
58
- fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))
59
-
60
- fasttext_src_cc = list(
61
- map(lambda x: str(os.path.join(FASTTEXT_SRC, x)), fasttext_src_cc)
62
- )
63
-
64
- ext_modules = [
65
- Extension(
66
- str('fasttext_pybind'),
67
- [
68
- str('python/fasttext_module/fasttext/pybind/fasttext_pybind.cc'),
69
- ] + fasttext_src_cc,
70
- include_dirs=[
71
- # Path to pybind11 headers
72
- get_pybind_include(),
73
- get_pybind_include(user=True),
74
- # Path to fasttext source code
75
- FASTTEXT_SRC,
76
- ],
77
- language='c++',
78
- extra_compile_args=["-O0 -fno-inline -fprofile-arcs -pthread -march=native" if coverage else
79
- "-O3 -funroll-loops -pthread -march=native"],
80
- ),
81
- ]
82
-
83
-
84
- # As of Python 3.6, CCompiler has a `has_flag` method.
85
- # cf http://bugs.python.org/issue26689
86
- def has_flag(compiler, flags):
87
- """Return a boolean indicating whether a flag name is supported on
88
- the specified compiler.
89
- """
90
- import tempfile
91
- with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
92
- f.write('int main (int argc, char **argv) { return 0; }')
93
- try:
94
- compiler.compile([f.name], extra_postargs=flags)
95
- except setuptools.distutils.errors.CompileError:
96
- return False
97
- return True
98
-
99
-
100
- def cpp_flag(compiler):
101
- """Return the -std=c++[0x/11/14] compiler flag.
102
- The c++14 is preferred over c++0x/11 (when it is available).
103
- """
104
- standards = ['-std=c++14', '-std=c++11', '-std=c++0x']
105
- for standard in standards:
106
- if has_flag(compiler, [standard]):
107
- return standard
108
- raise RuntimeError(
109
- 'Unsupported compiler -- at least C++0x support '
110
- 'is needed!'
111
- )
112
-
113
-
114
- class BuildExt(build_ext):
115
- """A custom build extension for adding compiler-specific options."""
116
- c_opts = {
117
- 'msvc': ['/EHsc'],
118
- 'unix': [],
119
- }
120
-
121
- def build_extensions(self):
122
- if sys.platform == 'darwin':
123
- mac_osx_version = float('.'.join(platform.mac_ver()[0].split('.')[:2]))
124
- os.environ['MACOSX_DEPLOYMENT_TARGET'] = str(mac_osx_version)
125
- all_flags = ['-stdlib=libc++', '-mmacosx-version-min=10.7']
126
- if has_flag(self.compiler, [all_flags[0]]):
127
- self.c_opts['unix'] += [all_flags[0]]
128
- elif has_flag(self.compiler, all_flags):
129
- self.c_opts['unix'] += all_flags
130
- else:
131
- raise RuntimeError(
132
- 'libc++ is needed! Failed to compile with {} and {}.'.
133
- format(" ".join(all_flags), all_flags[0])
134
- )
135
- ct = self.compiler.compiler_type
136
- opts = self.c_opts.get(ct, [])
137
- extra_link_args = []
138
-
139
- if coverage:
140
- coverage_option = '--coverage'
141
- opts.append(coverage_option)
142
- extra_link_args.append(coverage_option)
143
-
144
- if ct == 'unix':
145
- opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
146
- opts.append(cpp_flag(self.compiler))
147
- if has_flag(self.compiler, ['-fvisibility=hidden']):
148
- opts.append('-fvisibility=hidden')
149
- elif ct == 'msvc':
150
- opts.append(
151
- '/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()
152
- )
153
- for ext in self.extensions:
154
- ext.extra_compile_args = opts
155
- ext.extra_link_args = extra_link_args
156
- build_ext.build_extensions(self)
157
-
158
-
159
- def _get_readme():
160
- """
161
- Use pandoc to generate rst from md.
162
- pandoc --from=markdown --to=rst --output=python/README.rst python/README.md
163
- """
164
- with io.open("python/README.rst", encoding='utf-8') as fid:
165
- return fid.read()
166
-
167
-
168
- setup(
169
- name='fasttext',
170
- version=__version__,
171
- author='Onur Celebi',
172
- author_email='celebio@fb.com',
173
- description='fasttext Python bindings',
174
- long_description=_get_readme(),
175
- ext_modules=ext_modules,
176
- url='https://github.com/facebookresearch/fastText',
177
- license='MIT',
178
- classifiers=[
179
- 'Development Status :: 3 - Alpha',
180
- 'Intended Audience :: Developers',
181
- 'Intended Audience :: Science/Research',
182
- 'License :: OSI Approved :: MIT License',
183
- 'Programming Language :: Python :: 2.7',
184
- 'Programming Language :: Python :: 3.4',
185
- 'Programming Language :: Python :: 3.5',
186
- 'Programming Language :: Python :: 3.6',
187
- 'Topic :: Software Development',
188
- 'Topic :: Scientific/Engineering',
189
- 'Operating System :: Microsoft :: Windows',
190
- 'Operating System :: POSIX',
191
- 'Operating System :: Unix',
192
- 'Operating System :: MacOS',
193
- ],
194
- install_requires=['pybind11>=2.2', "setuptools >= 0.7.0", "numpy"],
195
- cmdclass={'build_ext': BuildExt},
196
- packages=[
197
- str('fasttext'),
198
- str('fasttext.util'),
199
- str('fasttext.tests'),
200
- ],
201
- package_dir={str(''): str('python/fasttext_module')},
202
- zip_safe=False,
203
- )
@@ -1,202 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2016-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- DATADIR=${DATADIR:-data}
11
-
12
- report_error() {
13
- echo "Error on line $1 of $0"
14
- }
15
-
16
- myshuf() {
17
- perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
18
- }
19
-
20
- normalize_text() {
21
- tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
22
- sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
23
- -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
24
- -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
25
- }
26
-
27
- set -e
28
- trap 'report_error $LINENO' ERR
29
-
30
- mkdir -p "${DATADIR}"
31
-
32
-
33
- # Unsupervised datasets
34
-
35
- data_result="${DATADIR}/rw_queries.txt"
36
- if [ ! -f "$data_result" ]
37
- then
38
- cut -f 1,2 "${DATADIR}"/rw/rw.txt | awk '{print tolower($0)}' | tr '\t' '\n' > "$data_result" || rm -f "$data_result"
39
- fi
40
-
41
- data_result="${DATADIR}/enwik9.zip"
42
- if [ ! -f "$data_result" ] || \
43
- [ $(md5sum "$data_result" | cut -f 1 -d ' ') != "3e773f8a1577fda2e27f871ca17f31fd" ]
44
- then
45
- wget -c http://mattmahoney.net/dc/enwik9.zip -P "${DATADIR}" || rm -f "$data_result"
46
- unzip "$data_result" -d "${DATADIR}" || rm -f "$data_result"
47
- fi
48
-
49
- data_result="${DATADIR}/fil9"
50
- if [ ! -f "$data_result" ]
51
- then
52
- perl wikifil.pl "${DATADIR}/enwik9" > "$data_result" || rm -f "$data_result"
53
- fi
54
-
55
- data_result="${DATADIR}/rw/rw.txt"
56
- if [ ! -f "$data_result" ]
57
- then
58
- wget -c https://nlp.stanford.edu/~lmthang/morphoNLM/rw.zip -P "${DATADIR}"
59
- unzip "${DATADIR}/rw.zip" -d "${DATADIR}" || rm -f "$data_result"
60
- fi
61
-
62
- # Supervised datasets
63
- # Each datasets comes with a .train and a .test to measure performance
64
-
65
- echo "Downloading dataset dbpedia"
66
-
67
- data_result="${DATADIR}/dbpedia_csv.tar.gz"
68
- if [ ! -f "$data_result" ] || \
69
- [ $(md5sum "$data_result" | cut -f 1 -d ' ') != "8139d58cf075c7f70d085358e73af9b3" ]
70
- then
71
- wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "$data_result"
72
- tar -xzvf "$data_result" -C "${DATADIR}"
73
- fi
74
-
75
- data_result="${DATADIR}/dbpedia.train"
76
- if [ ! -f "$data_result" ]
77
- then
78
- cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "$data_result" || rm -f "$data_result"
79
- fi
80
-
81
- data_result="${DATADIR}/dbpedia.test"
82
- if [ ! -f "$data_result" ]
83
- then
84
- cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "$data_result" || rm -f "$data_result"
85
- fi
86
-
87
- echo "Downloading dataset tatoeba for langid"
88
-
89
- data_result="${DATADIR}"/langid/all.txt
90
- if [ ! -f "$data_result" ]
91
- then
92
- mkdir -p "${DATADIR}"/langid
93
- wget http://downloads.tatoeba.org/exports/sentences.tar.bz2 -O "${DATADIR}"/langid/sentences.tar.bz2
94
- tar xvfj "${DATADIR}"/langid/sentences.tar.bz2 --directory "${DATADIR}"/langid || exit 1
95
- awk -F"\t" '{print"__label__"$2" "$3}' < "${DATADIR}"/langid/sentences.csv | shuf > "$data_result"
96
- fi
97
-
98
- data_result="${DATADIR}/langid.train"
99
- if [ ! -f "$data_result" ]
100
- then
101
- tail -n +10001 "${DATADIR}"/langid/all.txt > "$data_result"
102
- fi
103
-
104
- data_result="${DATADIR}/langid.valid"
105
- if [ ! -f "$data_result" ]
106
- then
107
- head -n 10000 "${DATADIR}"/langid/all.txt > "$data_result"
108
- fi
109
-
110
- echo "Downloading cooking dataset"
111
-
112
- data_result="${DATADIR}"/cooking/cooking.stackexchange.txt
113
- if [ ! -f "$data_result" ]
114
- then
115
- mkdir -p "${DATADIR}"/cooking/
116
- wget https://dl.fbaipublicfiles.com/fasttext/data/cooking.stackexchange.tar.gz -O "${DATADIR}"/cooking/cooking.stackexchange.tar.gz
117
- tar xvzf "${DATADIR}"/cooking/cooking.stackexchange.tar.gz --directory "${DATADIR}"/cooking || exit 1
118
- cat "${DATADIR}"/cooking/cooking.stackexchange.txt | sed -e "s/\([.\!?,'/()]\)/ \1 /g" | tr "[:upper:]" "[:lower:]" > "${DATADIR}"/cooking/cooking.preprocessed.txt
119
- fi
120
-
121
- data_result="${DATADIR}"/cooking.train
122
- if [ ! -f "$data_result" ]
123
- then
124
- head -n 12404 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.train
125
- fi
126
-
127
- data_result="${DATADIR}"/cooking.valid
128
- if [ ! -f "$data_result" ]
129
- then
130
- tail -n 3000 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.valid
131
- fi
132
-
133
- echo "Checking for YFCC100M"
134
-
135
- data_result="${DATADIR}"/YFCC100M/train
136
- if [ ! -f "$data_result" ]
137
- then
138
- echo 'Download YFCC100M, unpack it and place train into the following path: '"$data_result"
139
- echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
140
- echo 'After you download this, run the script again'
141
- exit 1
142
- fi
143
-
144
- data_result="${DATADIR}"/YFCC100M/test
145
- if [ ! -f "$data_result" ]
146
- then
147
- echo 'Download YFCC100M, unpack it and place test into the following path: '"$data_result"
148
- echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
149
- echo 'After you download this, run the script again'
150
- exit 1
151
- fi
152
-
153
- DATASET=(
154
- ag_news
155
- sogou_news
156
- dbpedia
157
- yelp_review_polarity
158
- yelp_review_full
159
- yahoo_answers
160
- amazon_review_full
161
- amazon_review_polarity
162
- )
163
-
164
- ID=(
165
- 0Bz8a_Dbh9QhbUDNpeUdjb0wxRms # ag_news
166
- 0Bz8a_Dbh9QhbUkVqNEszd0pHaFE # sogou_news
167
- 0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k # dbpedia
168
- 0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg # yelp_review_polarity
169
- 0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0 # yelp_review_full
170
- 0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU # yahoo_answers
171
- 0Bz8a_Dbh9QhbZVhsUnRWRDhETzA # amazon_review_full
172
- 0Bz8a_Dbh9QhbaW12WVVZS2drcnM # amazon_review_polarity
173
- )
174
-
175
- # Small datasets first
176
-
177
- for i in {0..0}
178
- do
179
- echo "Downloading dataset ${DATASET[i]}"
180
- if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
181
- then
182
- wget -c "https://drive.google.com/uc?export=download&id=${ID[i]}" -O "${DATADIR}/${DATASET[i]}_csv.tar.gz"
183
- tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
184
- cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
185
- cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
186
- fi
187
- done
188
-
189
- # Large datasets require a bit more work due to the extra request page
190
-
191
- for i in {1..7}
192
- do
193
- echo "Downloading dataset ${DATASET[i]}"
194
- if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
195
- then
196
- curl -c /tmp/cookies "https://drive.google.com/uc?export=download&id=${ID[i]}" > /tmp/intermezzo.html
197
- curl -L -b /tmp/cookies "https://drive.google.com$(cat /tmp/intermezzo.html | grep -Po 'uc-download-link" [^>]* href="\K[^"]*' | sed 's/\&amp;/\&/g')" > "${DATADIR}/${DATASET[i]}_csv.tar.gz"
198
- tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
199
- cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
200
- cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
201
- fi
202
- done