fasttext 0.1.0 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (478) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +20 -1
  4. data/lib/fasttext.rb +3 -0
  5. data/lib/fasttext/classifier.rb +12 -4
  6. data/lib/fasttext/vectorizer.rb +1 -1
  7. data/lib/fasttext/version.rb +1 -1
  8. metadata +4 -473
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/vendor/fastText/CMakeLists.txt +0 -68
  11. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  12. data/vendor/fastText/CONTRIBUTING.md +0 -32
  13. data/vendor/fastText/MANIFEST.in +0 -5
  14. data/vendor/fastText/Makefile +0 -63
  15. data/vendor/fastText/alignment/README.md +0 -53
  16. data/vendor/fastText/alignment/align.py +0 -145
  17. data/vendor/fastText/alignment/eval.py +0 -60
  18. data/vendor/fastText/alignment/example.sh +0 -51
  19. data/vendor/fastText/alignment/unsup_align.py +0 -109
  20. data/vendor/fastText/alignment/utils.py +0 -154
  21. data/vendor/fastText/classification-example.sh +0 -41
  22. data/vendor/fastText/classification-results.sh +0 -94
  23. data/vendor/fastText/crawl/README.md +0 -26
  24. data/vendor/fastText/crawl/dedup.cc +0 -51
  25. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  26. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  27. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  28. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  29. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  30. data/vendor/fastText/docs/api.md +0 -6
  31. data/vendor/fastText/docs/cheatsheet.md +0 -66
  32. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  33. data/vendor/fastText/docs/dataset.md +0 -6
  34. data/vendor/fastText/docs/english-vectors.md +0 -53
  35. data/vendor/fastText/docs/faqs.md +0 -63
  36. data/vendor/fastText/docs/language-identification.md +0 -47
  37. data/vendor/fastText/docs/options.md +0 -50
  38. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  39. data/vendor/fastText/docs/python-module.md +0 -314
  40. data/vendor/fastText/docs/references.md +0 -41
  41. data/vendor/fastText/docs/supervised-models.md +0 -54
  42. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  43. data/vendor/fastText/docs/support.md +0 -58
  44. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  45. data/vendor/fastText/eval.py +0 -95
  46. data/vendor/fastText/get-wikimedia.sh +0 -79
  47. data/vendor/fastText/python/README.md +0 -322
  48. data/vendor/fastText/python/README.rst +0 -406
  49. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  50. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  51. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  52. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  53. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  54. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  55. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  56. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  57. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  58. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  59. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  60. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  61. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  62. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  63. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  64. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  65. data/vendor/fastText/quantization-example.sh +0 -40
  66. data/vendor/fastText/runtests.py +0 -60
  67. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  68. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  69. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  70. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  71. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  72. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  73. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  74. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  75. data/vendor/fastText/setup.cfg +0 -2
  76. data/vendor/fastText/setup.py +0 -203
  77. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  78. data/vendor/fastText/website/README.md +0 -6
  79. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  80. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  81. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  82. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  83. data/vendor/fastText/website/core/Footer.js +0 -127
  84. data/vendor/fastText/website/package.json +0 -12
  85. data/vendor/fastText/website/pages/en/index.js +0 -286
  86. data/vendor/fastText/website/sidebars.json +0 -18
  87. data/vendor/fastText/website/siteConfig.js +0 -102
  88. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  89. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  90. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  91. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  92. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  93. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  94. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  95. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  96. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  97. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  98. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  99. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  100. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  101. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  102. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  103. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  104. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  105. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  106. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  107. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  108. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  109. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  110. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  111. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  112. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  113. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  114. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  115. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  116. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  121. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  122. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  123. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  124. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  125. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  126. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  127. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  128. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  129. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  130. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  131. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  132. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  133. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  134. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  135. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  136. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  137. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  138. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  139. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  140. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  141. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  142. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  143. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  144. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  145. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  146. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  147. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  148. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  149. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  150. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  151. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  152. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  153. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  154. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  155. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  156. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  157. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  158. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  159. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  160. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  161. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  162. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  163. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  164. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  165. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  166. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  167. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  168. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  169. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  170. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  171. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  172. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  173. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  174. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  175. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  176. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  177. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  178. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  179. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  180. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  181. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  182. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  183. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  184. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  185. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  186. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  187. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  188. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  189. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  190. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  191. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  192. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  193. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  194. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  195. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  196. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  197. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  198. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  199. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  200. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  201. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  202. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  203. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  204. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  205. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  206. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  207. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  208. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  209. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  210. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  211. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  212. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  213. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  214. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  215. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  216. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  217. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  218. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  219. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  220. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  221. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  222. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  223. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  224. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  225. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  226. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  227. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  228. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  229. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  230. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  232. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  233. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  237. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  241. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  243. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  245. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  247. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  249. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  251. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  253. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  255. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  257. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  259. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  261. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  263. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  265. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  267. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  269. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  271. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  273. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  275. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  277. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  279. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  281. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  282. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  283. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  284. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  285. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  286. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  287. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  288. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  289. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  290. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  291. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  292. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  293. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  294. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  295. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  296. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  297. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  298. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  299. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  300. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  302. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  304. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  308. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  310. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  312. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  314. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  316. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  318. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  322. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  324. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  326. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  332. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  338. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  342. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  372. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  374. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  375. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  376. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  377. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  378. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  382. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  384. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  386. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  388. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  390. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  392. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  393. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  394. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  395. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  396. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  397. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  398. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  399. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  400. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  401. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  402. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  404. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  426. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  427. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  428. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  429. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  430. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  431. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  432. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  433. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  434. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  435. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  436. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  437. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  438. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  439. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  440. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  441. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  442. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  443. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  444. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  445. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  446. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  447. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  448. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  449. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  450. data/vendor/fastText/website/static/fasttext.css +0 -48
  451. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  452. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  453. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  454. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  455. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  456. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  457. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  458. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  459. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  460. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  461. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  462. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  463. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  464. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  465. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  466. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  467. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  468. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  469. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  470. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  471. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  472. data/vendor/fastText/website/static/img/model-black.png +0 -0
  473. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  474. data/vendor/fastText/website/static/img/model-red.png +0 -0
  475. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  476. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  477. data/vendor/fastText/wikifil.pl +0 -57
  478. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,38 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for SVO
10
- DIR=data/SVO-tensor-dataset
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
- pushd $FASTTEXTDIR
15
- make opt
16
- popd
17
- ft=${FASTTEXTDIR}/fasttext
18
-
19
- ## Train model and test it on validation:
20
-
21
- dim=200
22
- epoch=3
23
- model=svo
24
-
25
- echo "---- train ----"
26
- time $ft supervised -input ${DIR}/ft_svo_data_train_1000000.dat \
27
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
28
-
29
- echo "computing raw hit@5%..."
30
- $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
31
-
32
-
33
- echo "---- train + valid ----"
34
- time $ft supervised -input ${DIR}/ft_svo_data-valid+train.dat \
35
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
36
-
37
- echo "computing raw hit@5%..."
38
- $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for WN11
10
- DIR=data/wordnet-mlj12/
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
-
15
- pushd $FASTTEXTDIR
16
- make opt
17
- popd
18
- ft=${FASTTEXTDIR}/fasttext
19
-
20
- g++ -std=c++0x eval.cpp -o eval
21
-
22
- # Train model and test it:
23
- dim=100
24
- epoch=100
25
- neg=500
26
- model=data/wn
27
- pred=data/wnpred
28
-
29
- echo "---- train ----"
30
- $ft supervised -input ${DIR}/ft_wordnet-mlj12-train.txt \
31
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
32
-
33
- echo "computing raw hits@10..."
34
- $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
35
-
36
- echo "computing filtered hit@10..."
37
- $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
38
- ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
39
-
40
- echo "---- train+val ----"
41
- $ft supervised -input ${DIR}/ft_wordnet-mlj12-valid+train.txt \
42
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
43
-
44
- echo "computing raw hits@10..."
45
- $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
46
-
47
- echo "computing filtered hit@10..."
48
- $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
49
- ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2016-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- # This script applies quantization to the models from Table 1 in:
11
- # Bag of Tricks for Efficient Text Classification, arXiv 1607.01759, 2016
12
-
13
- set -e
14
-
15
- DATASET=(
16
- ag_news
17
- sogou_news
18
- dbpedia
19
- yelp_review_polarity
20
- yelp_review_full
21
- yahoo_answers
22
- amazon_review_full
23
- amazon_review_polarity
24
- )
25
-
26
- # These learning rates were chosen by validation on a subset of the training set.
27
- LR=( 0.25 0.5 0.5 0.1 0.1 0.1 0.05 0.05 )
28
-
29
- RESULTDIR=result
30
- DATADIR=data
31
-
32
- echo 'Warning! Make sure you run the classification-results.sh script before this one'
33
- echo 'Otherwise you can expect the commands in this script to fail'
34
-
35
- for i in {0..7}
36
- do
37
- echo "Working on dataset ${DATASET[i]}"
38
- ../../fasttext quantize -input "${DATADIR}/${DATASET[i]}.train" \
39
- -output "${RESULTDIR}/${DATASET[i]}" -lr "${LR[i]}" \
40
- -thread 4 -qnorm -retrain -epoch 5 -cutoff 100000 > /dev/null
41
- ../../fasttext test "${RESULTDIR}/${DATASET[i]}.ftz" \
42
- "${DATADIR}/${DATASET[i]}.test"
43
- done
@@ -1,2 +0,0 @@
1
- [metadata]
2
- description-file = README.md
@@ -1,203 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- # Copyright (c) 2017-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- from __future__ import absolute_import
11
- from __future__ import division
12
- from __future__ import print_function
13
- from __future__ import unicode_literals
14
-
15
- from setuptools import setup, Extension
16
- from setuptools.command.build_ext import build_ext
17
- import sys
18
- import setuptools
19
- import os
20
- import subprocess
21
- import platform
22
- import io
23
-
24
- __version__ = '0.9.1'
25
- FASTTEXT_SRC = "src"
26
-
27
- # Based on https://github.com/pybind/python_example
28
-
29
- class get_pybind_include(object):
30
- """Helper class to determine the pybind11 include path
31
-
32
- The purpose of this class is to postpone importing pybind11
33
- until it is actually installed, so that the ``get_include()``
34
- method can be invoked. """
35
-
36
- def __init__(self, user=False):
37
- try:
38
- import pybind11
39
- except ImportError:
40
- if subprocess.call([sys.executable, '-m', 'pip', 'install', 'pybind11']):
41
- raise RuntimeError('pybind11 install failed.')
42
-
43
- self.user = user
44
-
45
- def __str__(self):
46
- import pybind11
47
- return pybind11.get_include(self.user)
48
-
49
- try:
50
- coverage_index = sys.argv.index('--coverage')
51
- except ValueError:
52
- coverage = False
53
- else:
54
- del sys.argv[coverage_index]
55
- coverage = True
56
-
57
- fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
58
- fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))
59
-
60
- fasttext_src_cc = list(
61
- map(lambda x: str(os.path.join(FASTTEXT_SRC, x)), fasttext_src_cc)
62
- )
63
-
64
- ext_modules = [
65
- Extension(
66
- str('fasttext_pybind'),
67
- [
68
- str('python/fasttext_module/fasttext/pybind/fasttext_pybind.cc'),
69
- ] + fasttext_src_cc,
70
- include_dirs=[
71
- # Path to pybind11 headers
72
- get_pybind_include(),
73
- get_pybind_include(user=True),
74
- # Path to fasttext source code
75
- FASTTEXT_SRC,
76
- ],
77
- language='c++',
78
- extra_compile_args=["-O0 -fno-inline -fprofile-arcs -pthread -march=native" if coverage else
79
- "-O3 -funroll-loops -pthread -march=native"],
80
- ),
81
- ]
82
-
83
-
84
- # As of Python 3.6, CCompiler has a `has_flag` method.
85
- # cf http://bugs.python.org/issue26689
86
- def has_flag(compiler, flags):
87
- """Return a boolean indicating whether a flag name is supported on
88
- the specified compiler.
89
- """
90
- import tempfile
91
- with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
92
- f.write('int main (int argc, char **argv) { return 0; }')
93
- try:
94
- compiler.compile([f.name], extra_postargs=flags)
95
- except setuptools.distutils.errors.CompileError:
96
- return False
97
- return True
98
-
99
-
100
- def cpp_flag(compiler):
101
- """Return the -std=c++[0x/11/14] compiler flag.
102
- The c++14 is preferred over c++0x/11 (when it is available).
103
- """
104
- standards = ['-std=c++14', '-std=c++11', '-std=c++0x']
105
- for standard in standards:
106
- if has_flag(compiler, [standard]):
107
- return standard
108
- raise RuntimeError(
109
- 'Unsupported compiler -- at least C++0x support '
110
- 'is needed!'
111
- )
112
-
113
-
114
- class BuildExt(build_ext):
115
- """A custom build extension for adding compiler-specific options."""
116
- c_opts = {
117
- 'msvc': ['/EHsc'],
118
- 'unix': [],
119
- }
120
-
121
- def build_extensions(self):
122
- if sys.platform == 'darwin':
123
- mac_osx_version = float('.'.join(platform.mac_ver()[0].split('.')[:2]))
124
- os.environ['MACOSX_DEPLOYMENT_TARGET'] = str(mac_osx_version)
125
- all_flags = ['-stdlib=libc++', '-mmacosx-version-min=10.7']
126
- if has_flag(self.compiler, [all_flags[0]]):
127
- self.c_opts['unix'] += [all_flags[0]]
128
- elif has_flag(self.compiler, all_flags):
129
- self.c_opts['unix'] += all_flags
130
- else:
131
- raise RuntimeError(
132
- 'libc++ is needed! Failed to compile with {} and {}.'.
133
- format(" ".join(all_flags), all_flags[0])
134
- )
135
- ct = self.compiler.compiler_type
136
- opts = self.c_opts.get(ct, [])
137
- extra_link_args = []
138
-
139
- if coverage:
140
- coverage_option = '--coverage'
141
- opts.append(coverage_option)
142
- extra_link_args.append(coverage_option)
143
-
144
- if ct == 'unix':
145
- opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
146
- opts.append(cpp_flag(self.compiler))
147
- if has_flag(self.compiler, ['-fvisibility=hidden']):
148
- opts.append('-fvisibility=hidden')
149
- elif ct == 'msvc':
150
- opts.append(
151
- '/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()
152
- )
153
- for ext in self.extensions:
154
- ext.extra_compile_args = opts
155
- ext.extra_link_args = extra_link_args
156
- build_ext.build_extensions(self)
157
-
158
-
159
- def _get_readme():
160
- """
161
- Use pandoc to generate rst from md.
162
- pandoc --from=markdown --to=rst --output=python/README.rst python/README.md
163
- """
164
- with io.open("python/README.rst", encoding='utf-8') as fid:
165
- return fid.read()
166
-
167
-
168
- setup(
169
- name='fasttext',
170
- version=__version__,
171
- author='Onur Celebi',
172
- author_email='celebio@fb.com',
173
- description='fasttext Python bindings',
174
- long_description=_get_readme(),
175
- ext_modules=ext_modules,
176
- url='https://github.com/facebookresearch/fastText',
177
- license='MIT',
178
- classifiers=[
179
- 'Development Status :: 3 - Alpha',
180
- 'Intended Audience :: Developers',
181
- 'Intended Audience :: Science/Research',
182
- 'License :: OSI Approved :: MIT License',
183
- 'Programming Language :: Python :: 2.7',
184
- 'Programming Language :: Python :: 3.4',
185
- 'Programming Language :: Python :: 3.5',
186
- 'Programming Language :: Python :: 3.6',
187
- 'Topic :: Software Development',
188
- 'Topic :: Scientific/Engineering',
189
- 'Operating System :: Microsoft :: Windows',
190
- 'Operating System :: POSIX',
191
- 'Operating System :: Unix',
192
- 'Operating System :: MacOS',
193
- ],
194
- install_requires=['pybind11>=2.2', "setuptools >= 0.7.0", "numpy"],
195
- cmdclass={'build_ext': BuildExt},
196
- packages=[
197
- str('fasttext'),
198
- str('fasttext.util'),
199
- str('fasttext.tests'),
200
- ],
201
- package_dir={str(''): str('python/fasttext_module')},
202
- zip_safe=False,
203
- )
@@ -1,202 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2016-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- DATADIR=${DATADIR:-data}
11
-
12
- report_error() {
13
- echo "Error on line $1 of $0"
14
- }
15
-
16
- myshuf() {
17
- perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
18
- }
19
-
20
- normalize_text() {
21
- tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
22
- sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
23
- -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
24
- -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
25
- }
26
-
27
- set -e
28
- trap 'report_error $LINENO' ERR
29
-
30
- mkdir -p "${DATADIR}"
31
-
32
-
33
- # Unsupervised datasets
34
-
35
- data_result="${DATADIR}/rw_queries.txt"
36
- if [ ! -f "$data_result" ]
37
- then
38
- cut -f 1,2 "${DATADIR}"/rw/rw.txt | awk '{print tolower($0)}' | tr '\t' '\n' > "$data_result" || rm -f "$data_result"
39
- fi
40
-
41
- data_result="${DATADIR}/enwik9.zip"
42
- if [ ! -f "$data_result" ] || \
43
- [ $(md5sum "$data_result" | cut -f 1 -d ' ') != "3e773f8a1577fda2e27f871ca17f31fd" ]
44
- then
45
- wget -c http://mattmahoney.net/dc/enwik9.zip -P "${DATADIR}" || rm -f "$data_result"
46
- unzip "$data_result" -d "${DATADIR}" || rm -f "$data_result"
47
- fi
48
-
49
- data_result="${DATADIR}/fil9"
50
- if [ ! -f "$data_result" ]
51
- then
52
- perl wikifil.pl "${DATADIR}/enwik9" > "$data_result" || rm -f "$data_result"
53
- fi
54
-
55
- data_result="${DATADIR}/rw/rw.txt"
56
- if [ ! -f "$data_result" ]
57
- then
58
- wget -c https://nlp.stanford.edu/~lmthang/morphoNLM/rw.zip -P "${DATADIR}"
59
- unzip "${DATADIR}/rw.zip" -d "${DATADIR}" || rm -f "$data_result"
60
- fi
61
-
62
- # Supervised datasets
63
- # Each datasets comes with a .train and a .test to measure performance
64
-
65
- echo "Downloading dataset dbpedia"
66
-
67
- data_result="${DATADIR}/dbpedia_csv.tar.gz"
68
- if [ ! -f "$data_result" ] || \
69
- [ $(md5sum "$data_result" | cut -f 1 -d ' ') != "8139d58cf075c7f70d085358e73af9b3" ]
70
- then
71
- wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "$data_result"
72
- tar -xzvf "$data_result" -C "${DATADIR}"
73
- fi
74
-
75
- data_result="${DATADIR}/dbpedia.train"
76
- if [ ! -f "$data_result" ]
77
- then
78
- cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "$data_result" || rm -f "$data_result"
79
- fi
80
-
81
- data_result="${DATADIR}/dbpedia.test"
82
- if [ ! -f "$data_result" ]
83
- then
84
- cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "$data_result" || rm -f "$data_result"
85
- fi
86
-
87
- echo "Downloading dataset tatoeba for langid"
88
-
89
- data_result="${DATADIR}"/langid/all.txt
90
- if [ ! -f "$data_result" ]
91
- then
92
- mkdir -p "${DATADIR}"/langid
93
- wget http://downloads.tatoeba.org/exports/sentences.tar.bz2 -O "${DATADIR}"/langid/sentences.tar.bz2
94
- tar xvfj "${DATADIR}"/langid/sentences.tar.bz2 --directory "${DATADIR}"/langid || exit 1
95
- awk -F"\t" '{print"__label__"$2" "$3}' < "${DATADIR}"/langid/sentences.csv | shuf > "$data_result"
96
- fi
97
-
98
- data_result="${DATADIR}/langid.train"
99
- if [ ! -f "$data_result" ]
100
- then
101
- tail -n +10001 "${DATADIR}"/langid/all.txt > "$data_result"
102
- fi
103
-
104
- data_result="${DATADIR}/langid.valid"
105
- if [ ! -f "$data_result" ]
106
- then
107
- head -n 10000 "${DATADIR}"/langid/all.txt > "$data_result"
108
- fi
109
-
110
- echo "Downloading cooking dataset"
111
-
112
- data_result="${DATADIR}"/cooking/cooking.stackexchange.txt
113
- if [ ! -f "$data_result" ]
114
- then
115
- mkdir -p "${DATADIR}"/cooking/
116
- wget https://dl.fbaipublicfiles.com/fasttext/data/cooking.stackexchange.tar.gz -O "${DATADIR}"/cooking/cooking.stackexchange.tar.gz
117
- tar xvzf "${DATADIR}"/cooking/cooking.stackexchange.tar.gz --directory "${DATADIR}"/cooking || exit 1
118
- cat "${DATADIR}"/cooking/cooking.stackexchange.txt | sed -e "s/\([.\!?,'/()]\)/ \1 /g" | tr "[:upper:]" "[:lower:]" > "${DATADIR}"/cooking/cooking.preprocessed.txt
119
- fi
120
-
121
- data_result="${DATADIR}"/cooking.train
122
- if [ ! -f "$data_result" ]
123
- then
124
- head -n 12404 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.train
125
- fi
126
-
127
- data_result="${DATADIR}"/cooking.valid
128
- if [ ! -f "$data_result" ]
129
- then
130
- tail -n 3000 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.valid
131
- fi
132
-
133
- echo "Checking for YFCC100M"
134
-
135
- data_result="${DATADIR}"/YFCC100M/train
136
- if [ ! -f "$data_result" ]
137
- then
138
- echo 'Download YFCC100M, unpack it and place train into the following path: '"$data_result"
139
- echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
140
- echo 'After you download this, run the script again'
141
- exit 1
142
- fi
143
-
144
- data_result="${DATADIR}"/YFCC100M/test
145
- if [ ! -f "$data_result" ]
146
- then
147
- echo 'Download YFCC100M, unpack it and place test into the following path: '"$data_result"
148
- echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
149
- echo 'After you download this, run the script again'
150
- exit 1
151
- fi
152
-
153
- DATASET=(
154
- ag_news
155
- sogou_news
156
- dbpedia
157
- yelp_review_polarity
158
- yelp_review_full
159
- yahoo_answers
160
- amazon_review_full
161
- amazon_review_polarity
162
- )
163
-
164
- ID=(
165
- 0Bz8a_Dbh9QhbUDNpeUdjb0wxRms # ag_news
166
- 0Bz8a_Dbh9QhbUkVqNEszd0pHaFE # sogou_news
167
- 0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k # dbpedia
168
- 0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg # yelp_review_polarity
169
- 0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0 # yelp_review_full
170
- 0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU # yahoo_answers
171
- 0Bz8a_Dbh9QhbZVhsUnRWRDhETzA # amazon_review_full
172
- 0Bz8a_Dbh9QhbaW12WVVZS2drcnM # amazon_review_polarity
173
- )
174
-
175
- # Small datasets first
176
-
177
- for i in {0..0}
178
- do
179
- echo "Downloading dataset ${DATASET[i]}"
180
- if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
181
- then
182
- wget -c "https://drive.google.com/uc?export=download&id=${ID[i]}" -O "${DATADIR}/${DATASET[i]}_csv.tar.gz"
183
- tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
184
- cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
185
- cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
186
- fi
187
- done
188
-
189
- # Large datasets require a bit more work due to the extra request page
190
-
191
- for i in {1..7}
192
- do
193
- echo "Downloading dataset ${DATASET[i]}"
194
- if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
195
- then
196
- curl -c /tmp/cookies "https://drive.google.com/uc?export=download&id=${ID[i]}" > /tmp/intermezzo.html
197
- curl -L -b /tmp/cookies "https://drive.google.com$(cat /tmp/intermezzo.html | grep -Po 'uc-download-link" [^>]* href="\K[^"]*' | sed 's/\&amp;/\&/g')" > "${DATADIR}/${DATASET[i]}_csv.tar.gz"
198
- tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
199
- cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
200
- cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
201
- fi
202
- done