fasttext 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (498) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/LICENSE.txt +18 -18
  4. data/README.md +39 -12
  5. data/ext/fasttext/ext.cpp +108 -101
  6. data/ext/fasttext/extconf.rb +7 -9
  7. data/lib/fasttext.rb +3 -0
  8. data/lib/fasttext/classifier.rb +25 -7
  9. data/lib/fasttext/vectorizer.rb +7 -2
  10. data/lib/fasttext/version.rb +1 -1
  11. data/vendor/fastText/README.md +3 -3
  12. data/vendor/fastText/src/args.cc +179 -6
  13. data/vendor/fastText/src/args.h +29 -1
  14. data/vendor/fastText/src/autotune.cc +477 -0
  15. data/vendor/fastText/src/autotune.h +89 -0
  16. data/vendor/fastText/src/densematrix.cc +27 -7
  17. data/vendor/fastText/src/densematrix.h +10 -2
  18. data/vendor/fastText/src/fasttext.cc +125 -114
  19. data/vendor/fastText/src/fasttext.h +31 -52
  20. data/vendor/fastText/src/main.cc +32 -13
  21. data/vendor/fastText/src/meter.cc +148 -2
  22. data/vendor/fastText/src/meter.h +24 -2
  23. data/vendor/fastText/src/model.cc +0 -1
  24. data/vendor/fastText/src/real.h +0 -1
  25. data/vendor/fastText/src/utils.cc +25 -0
  26. data/vendor/fastText/src/utils.h +29 -0
  27. data/vendor/fastText/src/vector.cc +0 -1
  28. metadata +16 -539
  29. data/lib/fasttext/ext.bundle +0 -0
  30. data/vendor/fastText/CMakeLists.txt +0 -68
  31. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  32. data/vendor/fastText/CONTRIBUTING.md +0 -32
  33. data/vendor/fastText/MANIFEST.in +0 -5
  34. data/vendor/fastText/Makefile +0 -63
  35. data/vendor/fastText/alignment/README.md +0 -53
  36. data/vendor/fastText/alignment/align.py +0 -145
  37. data/vendor/fastText/alignment/eval.py +0 -60
  38. data/vendor/fastText/alignment/example.sh +0 -51
  39. data/vendor/fastText/alignment/unsup_align.py +0 -109
  40. data/vendor/fastText/alignment/utils.py +0 -154
  41. data/vendor/fastText/classification-example.sh +0 -41
  42. data/vendor/fastText/classification-results.sh +0 -94
  43. data/vendor/fastText/crawl/README.md +0 -26
  44. data/vendor/fastText/crawl/dedup.cc +0 -51
  45. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  46. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  47. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  48. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  49. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  50. data/vendor/fastText/docs/api.md +0 -6
  51. data/vendor/fastText/docs/cheatsheet.md +0 -66
  52. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  53. data/vendor/fastText/docs/dataset.md +0 -6
  54. data/vendor/fastText/docs/english-vectors.md +0 -53
  55. data/vendor/fastText/docs/faqs.md +0 -63
  56. data/vendor/fastText/docs/language-identification.md +0 -47
  57. data/vendor/fastText/docs/options.md +0 -50
  58. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  59. data/vendor/fastText/docs/python-module.md +0 -314
  60. data/vendor/fastText/docs/references.md +0 -41
  61. data/vendor/fastText/docs/supervised-models.md +0 -54
  62. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  63. data/vendor/fastText/docs/support.md +0 -58
  64. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  65. data/vendor/fastText/eval.py +0 -95
  66. data/vendor/fastText/get-wikimedia.sh +0 -79
  67. data/vendor/fastText/python/README.md +0 -322
  68. data/vendor/fastText/python/README.rst +0 -406
  69. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  70. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  71. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  72. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  73. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  74. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  75. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  76. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  77. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  78. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  79. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  80. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  81. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  82. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  83. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  84. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  85. data/vendor/fastText/quantization-example.sh +0 -40
  86. data/vendor/fastText/runtests.py +0 -60
  87. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  88. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  89. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  90. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  91. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  92. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  93. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  94. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  95. data/vendor/fastText/setup.cfg +0 -2
  96. data/vendor/fastText/setup.py +0 -203
  97. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  98. data/vendor/fastText/website/README.md +0 -6
  99. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  100. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  101. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  102. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  103. data/vendor/fastText/website/core/Footer.js +0 -127
  104. data/vendor/fastText/website/package.json +0 -12
  105. data/vendor/fastText/website/pages/en/index.js +0 -286
  106. data/vendor/fastText/website/sidebars.json +0 -18
  107. data/vendor/fastText/website/siteConfig.js +0 -102
  108. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  109. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  110. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  111. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  112. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  113. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  114. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  115. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  116. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  121. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  122. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  123. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  124. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  125. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  126. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  127. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  141. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  142. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  143. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  144. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  145. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  146. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  147. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  148. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  149. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  150. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  151. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  152. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  153. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  154. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  155. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  156. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  157. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  158. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  159. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  161. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  162. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  163. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  164. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  165. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  166. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  167. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  168. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  169. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  170. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  171. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  172. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  173. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  174. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  175. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  176. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  177. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  178. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  179. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  180. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  181. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  182. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  183. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  184. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  185. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  186. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  187. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  188. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  189. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  190. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  191. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  192. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  193. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  194. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  195. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  196. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  197. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  198. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  199. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  200. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  201. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  202. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  203. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  204. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  205. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  206. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  207. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  208. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  209. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  210. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  211. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  212. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  213. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  214. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  215. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  216. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  217. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  218. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  219. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  220. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  221. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  222. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  223. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  224. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  225. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  226. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  227. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  228. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  229. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  230. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  232. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  233. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  237. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  241. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  243. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  245. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  247. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  249. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  251. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  253. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  255. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  257. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  259. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  261. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  263. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  265. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  267. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  269. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  271. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  273. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  275. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  277. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  279. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  280. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  281. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  282. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  283. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  284. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  285. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  286. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  287. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  288. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  289. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  290. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  291. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  299. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  302. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  304. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  308. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  310. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  312. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  314. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  316. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  318. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  322. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  324. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  326. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  332. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  338. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  342. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  392. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  394. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  395. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  396. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  397. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  398. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  402. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  404. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  446. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  447. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  448. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  449. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  450. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  451. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  452. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  453. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  454. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  455. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  456. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  457. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  459. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  460. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  461. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  462. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  463. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  464. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  465. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  466. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  467. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  468. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  469. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  470. data/vendor/fastText/website/static/fasttext.css +0 -48
  471. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  472. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  473. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  474. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  475. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  476. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  477. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  478. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  479. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  480. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  481. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  482. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  483. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  484. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  485. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  486. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  487. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  488. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  489. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  490. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  491. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  492. data/vendor/fastText/website/static/img/model-black.png +0 -0
  493. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  494. data/vendor/fastText/website/static/img/model-red.png +0 -0
  495. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  496. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  497. data/vendor/fastText/wikifil.pl +0 -57
  498. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,38 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for SVO
10
- DIR=data/SVO-tensor-dataset
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
- pushd $FASTTEXTDIR
15
- make opt
16
- popd
17
- ft=${FASTTEXTDIR}/fasttext
18
-
19
- ## Train model and test it on validation:
20
-
21
- dim=200
22
- epoch=3
23
- model=svo
24
-
25
- echo "---- train ----"
26
- time $ft supervised -input ${DIR}/ft_svo_data_train_1000000.dat \
27
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
28
-
29
- echo "computing raw hit@5%..."
30
- $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
31
-
32
-
33
- echo "---- train + valid ----"
34
- time $ft supervised -input ${DIR}/ft_svo_data-valid+train.dat \
35
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
36
-
37
- echo "computing raw hit@5%..."
38
- $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for WN11
10
- DIR=data/wordnet-mlj12/
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
-
15
- pushd $FASTTEXTDIR
16
- make opt
17
- popd
18
- ft=${FASTTEXTDIR}/fasttext
19
-
20
- g++ -std=c++0x eval.cpp -o eval
21
-
22
- # Train model and test it:
23
- dim=100
24
- epoch=100
25
- neg=500
26
- model=data/wn
27
- pred=data/wnpred
28
-
29
- echo "---- train ----"
30
- $ft supervised -input ${DIR}/ft_wordnet-mlj12-train.txt \
31
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
32
-
33
- echo "computing raw hits@10..."
34
- $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
35
-
36
- echo "computing filtered hit@10..."
37
- $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
38
- ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
39
-
40
- echo "---- train+val ----"
41
- $ft supervised -input ${DIR}/ft_wordnet-mlj12-valid+train.txt \
42
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
43
-
44
- echo "computing raw hits@10..."
45
- $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
46
-
47
- echo "computing filtered hit@10..."
48
- $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
49
- ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
@@ -1,43 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2016-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- # This script applies quantization to the models from Table 1 in:
11
- # Bag of Tricks for Efficient Text Classification, arXiv 1607.01759, 2016
12
-
13
- set -e
14
-
15
- DATASET=(
16
- ag_news
17
- sogou_news
18
- dbpedia
19
- yelp_review_polarity
20
- yelp_review_full
21
- yahoo_answers
22
- amazon_review_full
23
- amazon_review_polarity
24
- )
25
-
26
- # These learning rates were chosen by validation on a subset of the training set.
27
- LR=( 0.25 0.5 0.5 0.1 0.1 0.1 0.05 0.05 )
28
-
29
- RESULTDIR=result
30
- DATADIR=data
31
-
32
- echo 'Warning! Make sure you run the classification-results.sh script before this one'
33
- echo 'Otherwise you can expect the commands in this script to fail'
34
-
35
- for i in {0..7}
36
- do
37
- echo "Working on dataset ${DATASET[i]}"
38
- ../../fasttext quantize -input "${DATADIR}/${DATASET[i]}.train" \
39
- -output "${RESULTDIR}/${DATASET[i]}" -lr "${LR[i]}" \
40
- -thread 4 -qnorm -retrain -epoch 5 -cutoff 100000 > /dev/null
41
- ../../fasttext test "${RESULTDIR}/${DATASET[i]}.ftz" \
42
- "${DATADIR}/${DATASET[i]}.test"
43
- done
@@ -1,2 +0,0 @@
1
- [metadata]
2
- description-file = README.md
@@ -1,203 +0,0 @@
1
- #!/usr/bin/env python
2
-
3
- # Copyright (c) 2017-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- from __future__ import absolute_import
11
- from __future__ import division
12
- from __future__ import print_function
13
- from __future__ import unicode_literals
14
-
15
- from setuptools import setup, Extension
16
- from setuptools.command.build_ext import build_ext
17
- import sys
18
- import setuptools
19
- import os
20
- import subprocess
21
- import platform
22
- import io
23
-
24
- __version__ = '0.9.1'
25
- FASTTEXT_SRC = "src"
26
-
27
- # Based on https://github.com/pybind/python_example
28
-
29
- class get_pybind_include(object):
30
- """Helper class to determine the pybind11 include path
31
-
32
- The purpose of this class is to postpone importing pybind11
33
- until it is actually installed, so that the ``get_include()``
34
- method can be invoked. """
35
-
36
- def __init__(self, user=False):
37
- try:
38
- import pybind11
39
- except ImportError:
40
- if subprocess.call([sys.executable, '-m', 'pip', 'install', 'pybind11']):
41
- raise RuntimeError('pybind11 install failed.')
42
-
43
- self.user = user
44
-
45
- def __str__(self):
46
- import pybind11
47
- return pybind11.get_include(self.user)
48
-
49
- try:
50
- coverage_index = sys.argv.index('--coverage')
51
- except ValueError:
52
- coverage = False
53
- else:
54
- del sys.argv[coverage_index]
55
- coverage = True
56
-
57
- fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
58
- fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))
59
-
60
- fasttext_src_cc = list(
61
- map(lambda x: str(os.path.join(FASTTEXT_SRC, x)), fasttext_src_cc)
62
- )
63
-
64
- ext_modules = [
65
- Extension(
66
- str('fasttext_pybind'),
67
- [
68
- str('python/fasttext_module/fasttext/pybind/fasttext_pybind.cc'),
69
- ] + fasttext_src_cc,
70
- include_dirs=[
71
- # Path to pybind11 headers
72
- get_pybind_include(),
73
- get_pybind_include(user=True),
74
- # Path to fasttext source code
75
- FASTTEXT_SRC,
76
- ],
77
- language='c++',
78
- extra_compile_args=["-O0 -fno-inline -fprofile-arcs -pthread -march=native" if coverage else
79
- "-O3 -funroll-loops -pthread -march=native"],
80
- ),
81
- ]
82
-
83
-
84
- # As of Python 3.6, CCompiler has a `has_flag` method.
85
- # cf http://bugs.python.org/issue26689
86
- def has_flag(compiler, flags):
87
- """Return a boolean indicating whether a flag name is supported on
88
- the specified compiler.
89
- """
90
- import tempfile
91
- with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
92
- f.write('int main (int argc, char **argv) { return 0; }')
93
- try:
94
- compiler.compile([f.name], extra_postargs=flags)
95
- except setuptools.distutils.errors.CompileError:
96
- return False
97
- return True
98
-
99
-
100
- def cpp_flag(compiler):
101
- """Return the -std=c++[0x/11/14] compiler flag.
102
- The c++14 is preferred over c++0x/11 (when it is available).
103
- """
104
- standards = ['-std=c++14', '-std=c++11', '-std=c++0x']
105
- for standard in standards:
106
- if has_flag(compiler, [standard]):
107
- return standard
108
- raise RuntimeError(
109
- 'Unsupported compiler -- at least C++0x support '
110
- 'is needed!'
111
- )
112
-
113
-
114
- class BuildExt(build_ext):
115
- """A custom build extension for adding compiler-specific options."""
116
- c_opts = {
117
- 'msvc': ['/EHsc'],
118
- 'unix': [],
119
- }
120
-
121
- def build_extensions(self):
122
- if sys.platform == 'darwin':
123
- mac_osx_version = float('.'.join(platform.mac_ver()[0].split('.')[:2]))
124
- os.environ['MACOSX_DEPLOYMENT_TARGET'] = str(mac_osx_version)
125
- all_flags = ['-stdlib=libc++', '-mmacosx-version-min=10.7']
126
- if has_flag(self.compiler, [all_flags[0]]):
127
- self.c_opts['unix'] += [all_flags[0]]
128
- elif has_flag(self.compiler, all_flags):
129
- self.c_opts['unix'] += all_flags
130
- else:
131
- raise RuntimeError(
132
- 'libc++ is needed! Failed to compile with {} and {}.'.
133
- format(" ".join(all_flags), all_flags[0])
134
- )
135
- ct = self.compiler.compiler_type
136
- opts = self.c_opts.get(ct, [])
137
- extra_link_args = []
138
-
139
- if coverage:
140
- coverage_option = '--coverage'
141
- opts.append(coverage_option)
142
- extra_link_args.append(coverage_option)
143
-
144
- if ct == 'unix':
145
- opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
146
- opts.append(cpp_flag(self.compiler))
147
- if has_flag(self.compiler, ['-fvisibility=hidden']):
148
- opts.append('-fvisibility=hidden')
149
- elif ct == 'msvc':
150
- opts.append(
151
- '/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()
152
- )
153
- for ext in self.extensions:
154
- ext.extra_compile_args = opts
155
- ext.extra_link_args = extra_link_args
156
- build_ext.build_extensions(self)
157
-
158
-
159
- def _get_readme():
160
- """
161
- Use pandoc to generate rst from md.
162
- pandoc --from=markdown --to=rst --output=python/README.rst python/README.md
163
- """
164
- with io.open("python/README.rst", encoding='utf-8') as fid:
165
- return fid.read()
166
-
167
-
168
- setup(
169
- name='fasttext',
170
- version=__version__,
171
- author='Onur Celebi',
172
- author_email='celebio@fb.com',
173
- description='fasttext Python bindings',
174
- long_description=_get_readme(),
175
- ext_modules=ext_modules,
176
- url='https://github.com/facebookresearch/fastText',
177
- license='MIT',
178
- classifiers=[
179
- 'Development Status :: 3 - Alpha',
180
- 'Intended Audience :: Developers',
181
- 'Intended Audience :: Science/Research',
182
- 'License :: OSI Approved :: MIT License',
183
- 'Programming Language :: Python :: 2.7',
184
- 'Programming Language :: Python :: 3.4',
185
- 'Programming Language :: Python :: 3.5',
186
- 'Programming Language :: Python :: 3.6',
187
- 'Topic :: Software Development',
188
- 'Topic :: Scientific/Engineering',
189
- 'Operating System :: Microsoft :: Windows',
190
- 'Operating System :: POSIX',
191
- 'Operating System :: Unix',
192
- 'Operating System :: MacOS',
193
- ],
194
- install_requires=['pybind11>=2.2', "setuptools >= 0.7.0", "numpy"],
195
- cmdclass={'build_ext': BuildExt},
196
- packages=[
197
- str('fasttext'),
198
- str('fasttext.util'),
199
- str('fasttext.tests'),
200
- ],
201
- package_dir={str(''): str('python/fasttext_module')},
202
- zip_safe=False,
203
- )
@@ -1,202 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2016-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
-
10
- DATADIR=${DATADIR:-data}
11
-
12
- report_error() {
13
- echo "Error on line $1 of $0"
14
- }
15
-
16
- myshuf() {
17
- perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
18
- }
19
-
20
- normalize_text() {
21
- tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
22
- sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
23
- -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
24
- -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
25
- }
26
-
27
- set -e
28
- trap 'report_error $LINENO' ERR
29
-
30
- mkdir -p "${DATADIR}"
31
-
32
-
33
- # Unsupervised datasets
34
-
35
- data_result="${DATADIR}/rw_queries.txt"
36
- if [ ! -f "$data_result" ]
37
- then
38
- cut -f 1,2 "${DATADIR}"/rw/rw.txt | awk '{print tolower($0)}' | tr '\t' '\n' > "$data_result" || rm -f "$data_result"
39
- fi
40
-
41
- data_result="${DATADIR}/enwik9.zip"
42
- if [ ! -f "$data_result" ] || \
43
- [ $(md5sum "$data_result" | cut -f 1 -d ' ') != "3e773f8a1577fda2e27f871ca17f31fd" ]
44
- then
45
- wget -c http://mattmahoney.net/dc/enwik9.zip -P "${DATADIR}" || rm -f "$data_result"
46
- unzip "$data_result" -d "${DATADIR}" || rm -f "$data_result"
47
- fi
48
-
49
- data_result="${DATADIR}/fil9"
50
- if [ ! -f "$data_result" ]
51
- then
52
- perl wikifil.pl "${DATADIR}/enwik9" > "$data_result" || rm -f "$data_result"
53
- fi
54
-
55
- data_result="${DATADIR}/rw/rw.txt"
56
- if [ ! -f "$data_result" ]
57
- then
58
- wget -c https://nlp.stanford.edu/~lmthang/morphoNLM/rw.zip -P "${DATADIR}"
59
- unzip "${DATADIR}/rw.zip" -d "${DATADIR}" || rm -f "$data_result"
60
- fi
61
-
62
- # Supervised datasets
63
- # Each datasets comes with a .train and a .test to measure performance
64
-
65
- echo "Downloading dataset dbpedia"
66
-
67
- data_result="${DATADIR}/dbpedia_csv.tar.gz"
68
- if [ ! -f "$data_result" ] || \
69
- [ $(md5sum "$data_result" | cut -f 1 -d ' ') != "8139d58cf075c7f70d085358e73af9b3" ]
70
- then
71
- wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "$data_result"
72
- tar -xzvf "$data_result" -C "${DATADIR}"
73
- fi
74
-
75
- data_result="${DATADIR}/dbpedia.train"
76
- if [ ! -f "$data_result" ]
77
- then
78
- cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "$data_result" || rm -f "$data_result"
79
- fi
80
-
81
- data_result="${DATADIR}/dbpedia.test"
82
- if [ ! -f "$data_result" ]
83
- then
84
- cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "$data_result" || rm -f "$data_result"
85
- fi
86
-
87
- echo "Downloading dataset tatoeba for langid"
88
-
89
- data_result="${DATADIR}"/langid/all.txt
90
- if [ ! -f "$data_result" ]
91
- then
92
- mkdir -p "${DATADIR}"/langid
93
- wget http://downloads.tatoeba.org/exports/sentences.tar.bz2 -O "${DATADIR}"/langid/sentences.tar.bz2
94
- tar xvfj "${DATADIR}"/langid/sentences.tar.bz2 --directory "${DATADIR}"/langid || exit 1
95
- awk -F"\t" '{print"__label__"$2" "$3}' < "${DATADIR}"/langid/sentences.csv | shuf > "$data_result"
96
- fi
97
-
98
- data_result="${DATADIR}/langid.train"
99
- if [ ! -f "$data_result" ]
100
- then
101
- tail -n +10001 "${DATADIR}"/langid/all.txt > "$data_result"
102
- fi
103
-
104
- data_result="${DATADIR}/langid.valid"
105
- if [ ! -f "$data_result" ]
106
- then
107
- head -n 10000 "${DATADIR}"/langid/all.txt > "$data_result"
108
- fi
109
-
110
- echo "Downloading cooking dataset"
111
-
112
- data_result="${DATADIR}"/cooking/cooking.stackexchange.txt
113
- if [ ! -f "$data_result" ]
114
- then
115
- mkdir -p "${DATADIR}"/cooking/
116
- wget https://dl.fbaipublicfiles.com/fasttext/data/cooking.stackexchange.tar.gz -O "${DATADIR}"/cooking/cooking.stackexchange.tar.gz
117
- tar xvzf "${DATADIR}"/cooking/cooking.stackexchange.tar.gz --directory "${DATADIR}"/cooking || exit 1
118
- cat "${DATADIR}"/cooking/cooking.stackexchange.txt | sed -e "s/\([.\!?,'/()]\)/ \1 /g" | tr "[:upper:]" "[:lower:]" > "${DATADIR}"/cooking/cooking.preprocessed.txt
119
- fi
120
-
121
- data_result="${DATADIR}"/cooking.train
122
- if [ ! -f "$data_result" ]
123
- then
124
- head -n 12404 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.train
125
- fi
126
-
127
- data_result="${DATADIR}"/cooking.valid
128
- if [ ! -f "$data_result" ]
129
- then
130
- tail -n 3000 "${DATADIR}"/cooking/cooking.preprocessed.txt > "${DATADIR}"/cooking.valid
131
- fi
132
-
133
- echo "Checking for YFCC100M"
134
-
135
- data_result="${DATADIR}"/YFCC100M/train
136
- if [ ! -f "$data_result" ]
137
- then
138
- echo 'Download YFCC100M, unpack it and place train into the following path: '"$data_result"
139
- echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
140
- echo 'After you download this, run the script again'
141
- exit 1
142
- fi
143
-
144
- data_result="${DATADIR}"/YFCC100M/test
145
- if [ ! -f "$data_result" ]
146
- then
147
- echo 'Download YFCC100M, unpack it and place test into the following path: '"$data_result"
148
- echo 'You can download YFCC100M at :'"https://fasttext.cc/docs/en/dataset.html"
149
- echo 'After you download this, run the script again'
150
- exit 1
151
- fi
152
-
153
- DATASET=(
154
- ag_news
155
- sogou_news
156
- dbpedia
157
- yelp_review_polarity
158
- yelp_review_full
159
- yahoo_answers
160
- amazon_review_full
161
- amazon_review_polarity
162
- )
163
-
164
- ID=(
165
- 0Bz8a_Dbh9QhbUDNpeUdjb0wxRms # ag_news
166
- 0Bz8a_Dbh9QhbUkVqNEszd0pHaFE # sogou_news
167
- 0Bz8a_Dbh9QhbQ2Vic1kxMmZZQ1k # dbpedia
168
- 0Bz8a_Dbh9QhbNUpYQ2N3SGlFaDg # yelp_review_polarity
169
- 0Bz8a_Dbh9QhbZlU4dXhHTFhZQU0 # yelp_review_full
170
- 0Bz8a_Dbh9Qhbd2JNdDBsQUdocVU # yahoo_answers
171
- 0Bz8a_Dbh9QhbZVhsUnRWRDhETzA # amazon_review_full
172
- 0Bz8a_Dbh9QhbaW12WVVZS2drcnM # amazon_review_polarity
173
- )
174
-
175
- # Small datasets first
176
-
177
- for i in {0..0}
178
- do
179
- echo "Downloading dataset ${DATASET[i]}"
180
- if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
181
- then
182
- wget -c "https://drive.google.com/uc?export=download&id=${ID[i]}" -O "${DATADIR}/${DATASET[i]}_csv.tar.gz"
183
- tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
184
- cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
185
- cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
186
- fi
187
- done
188
-
189
- # Large datasets require a bit more work due to the extra request page
190
-
191
- for i in {1..7}
192
- do
193
- echo "Downloading dataset ${DATASET[i]}"
194
- if [ ! -f "${DATADIR}/${DATASET[i]}.train" ]
195
- then
196
- curl -c /tmp/cookies "https://drive.google.com/uc?export=download&id=${ID[i]}" > /tmp/intermezzo.html
197
- curl -L -b /tmp/cookies "https://drive.google.com$(cat /tmp/intermezzo.html | grep -Po 'uc-download-link" [^>]* href="\K[^"]*' | sed 's/\&amp;/\&/g')" > "${DATADIR}/${DATASET[i]}_csv.tar.gz"
198
- tar -xzvf "${DATADIR}/${DATASET[i]}_csv.tar.gz" -C "${DATADIR}"
199
- cat "${DATADIR}/${DATASET[i]}_csv/train.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.train"
200
- cat "${DATADIR}/${DATASET[i]}_csv/test.csv" | normalize_text > "${DATADIR}/${DATASET[i]}.test"
201
- fi
202
- done