fasttext 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +251 -0
  5. data/ext/fasttext/ext.cpp +291 -0
  6. data/ext/fasttext/extconf.rb +15 -0
  7. data/lib/fasttext.rb +41 -0
  8. data/lib/fasttext/classifier.rb +92 -0
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/lib/fasttext/model.rb +60 -0
  11. data/lib/fasttext/vectorizer.rb +58 -0
  12. data/lib/fasttext/version.rb +3 -0
  13. data/vendor/fastText/CMakeLists.txt +68 -0
  14. data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
  15. data/vendor/fastText/CONTRIBUTING.md +32 -0
  16. data/vendor/fastText/LICENSE +21 -0
  17. data/vendor/fastText/MANIFEST.in +5 -0
  18. data/vendor/fastText/Makefile +63 -0
  19. data/vendor/fastText/README.md +339 -0
  20. data/vendor/fastText/alignment/README.md +53 -0
  21. data/vendor/fastText/alignment/align.py +145 -0
  22. data/vendor/fastText/alignment/eval.py +60 -0
  23. data/vendor/fastText/alignment/example.sh +51 -0
  24. data/vendor/fastText/alignment/unsup_align.py +109 -0
  25. data/vendor/fastText/alignment/utils.py +154 -0
  26. data/vendor/fastText/classification-example.sh +41 -0
  27. data/vendor/fastText/classification-results.sh +94 -0
  28. data/vendor/fastText/crawl/README.md +26 -0
  29. data/vendor/fastText/crawl/dedup.cc +51 -0
  30. data/vendor/fastText/crawl/download_crawl.sh +57 -0
  31. data/vendor/fastText/crawl/filter_dedup.sh +13 -0
  32. data/vendor/fastText/crawl/filter_utf8.cc +105 -0
  33. data/vendor/fastText/crawl/process_wet_file.sh +30 -0
  34. data/vendor/fastText/docs/aligned-vectors.md +64 -0
  35. data/vendor/fastText/docs/api.md +6 -0
  36. data/vendor/fastText/docs/cheatsheet.md +66 -0
  37. data/vendor/fastText/docs/crawl-vectors.md +125 -0
  38. data/vendor/fastText/docs/dataset.md +6 -0
  39. data/vendor/fastText/docs/english-vectors.md +53 -0
  40. data/vendor/fastText/docs/faqs.md +63 -0
  41. data/vendor/fastText/docs/language-identification.md +47 -0
  42. data/vendor/fastText/docs/options.md +50 -0
  43. data/vendor/fastText/docs/pretrained-vectors.md +142 -0
  44. data/vendor/fastText/docs/python-module.md +314 -0
  45. data/vendor/fastText/docs/references.md +41 -0
  46. data/vendor/fastText/docs/supervised-models.md +54 -0
  47. data/vendor/fastText/docs/supervised-tutorial.md +349 -0
  48. data/vendor/fastText/docs/support.md +58 -0
  49. data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
  50. data/vendor/fastText/eval.py +95 -0
  51. data/vendor/fastText/get-wikimedia.sh +79 -0
  52. data/vendor/fastText/python/README.md +322 -0
  53. data/vendor/fastText/python/README.rst +406 -0
  54. data/vendor/fastText/python/benchmarks/README.rst +3 -0
  55. data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
  56. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
  57. data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
  58. data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
  59. data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
  60. data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
  61. data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
  62. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
  63. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
  64. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
  65. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
  66. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
  67. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
  68. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
  69. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
  70. data/vendor/fastText/quantization-example.sh +40 -0
  71. data/vendor/fastText/runtests.py +60 -0
  72. data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
  73. data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
  74. data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
  75. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
  76. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
  77. data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
  78. data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
  79. data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
  80. data/vendor/fastText/setup.cfg +2 -0
  81. data/vendor/fastText/setup.py +203 -0
  82. data/vendor/fastText/src/args.cc +320 -0
  83. data/vendor/fastText/src/args.h +68 -0
  84. data/vendor/fastText/src/densematrix.cc +155 -0
  85. data/vendor/fastText/src/densematrix.h +75 -0
  86. data/vendor/fastText/src/dictionary.cc +540 -0
  87. data/vendor/fastText/src/dictionary.h +111 -0
  88. data/vendor/fastText/src/fasttext.cc +821 -0
  89. data/vendor/fastText/src/fasttext.h +191 -0
  90. data/vendor/fastText/src/loss.cc +346 -0
  91. data/vendor/fastText/src/loss.h +163 -0
  92. data/vendor/fastText/src/main.cc +435 -0
  93. data/vendor/fastText/src/matrix.cc +25 -0
  94. data/vendor/fastText/src/matrix.h +44 -0
  95. data/vendor/fastText/src/meter.cc +68 -0
  96. data/vendor/fastText/src/meter.h +69 -0
  97. data/vendor/fastText/src/model.cc +98 -0
  98. data/vendor/fastText/src/model.h +79 -0
  99. data/vendor/fastText/src/productquantizer.cc +251 -0
  100. data/vendor/fastText/src/productquantizer.h +63 -0
  101. data/vendor/fastText/src/quantmatrix.cc +117 -0
  102. data/vendor/fastText/src/quantmatrix.h +60 -0
  103. data/vendor/fastText/src/real.h +15 -0
  104. data/vendor/fastText/src/utils.cc +28 -0
  105. data/vendor/fastText/src/utils.h +43 -0
  106. data/vendor/fastText/src/vector.cc +97 -0
  107. data/vendor/fastText/src/vector.h +61 -0
  108. data/vendor/fastText/tests/fetch_test_data.sh +202 -0
  109. data/vendor/fastText/website/README.md +6 -0
  110. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
  111. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
  112. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
  113. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
  114. data/vendor/fastText/website/core/Footer.js +127 -0
  115. data/vendor/fastText/website/package.json +12 -0
  116. data/vendor/fastText/website/pages/en/index.js +286 -0
  117. data/vendor/fastText/website/sidebars.json +18 -0
  118. data/vendor/fastText/website/siteConfig.js +102 -0
  119. data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
  120. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
  121. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
  122. data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
  123. data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
  124. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
  125. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  126. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  127. data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
  141. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
  142. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
  143. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
  144. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
  145. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
  146. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
  147. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
  148. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
  149. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
  150. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
  151. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
  152. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  153. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
  154. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
  155. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
  156. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
  157. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
  158. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
  159. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
  161. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  162. data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
  163. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
  164. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
  165. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
  166. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
  167. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  168. data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
  169. data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
  170. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  171. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  172. data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
  173. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
  174. data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
  175. data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
  176. data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
  177. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
  178. data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
  179. data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
  180. data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
  181. data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
  182. data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
  183. data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
  184. data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
  185. data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
  186. data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
  187. data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
  188. data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
  189. data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
  190. data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
  191. data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
  192. data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
  193. data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
  194. data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
  195. data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
  196. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
  197. data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
  198. data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
  199. data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
  200. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
  201. data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
  202. data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
  203. data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
  204. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
  205. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
  206. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
  207. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
  208. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
  209. data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
  210. data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
  211. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
  212. data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
  213. data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
  214. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
  215. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
  216. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
  217. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
  218. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
  219. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
  220. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
  221. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
  222. data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
  223. data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
  224. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  225. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  226. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  227. data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
  228. data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
  229. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
  230. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
  231. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
  232. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  233. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
  234. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
  235. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
  236. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
  237. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
  238. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
  239. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
  240. data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
  241. data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
  242. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
  243. data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
  244. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
  245. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
  246. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
  247. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
  248. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
  249. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
  250. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
  251. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
  252. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
  253. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
  254. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
  255. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
  256. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
  257. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
  258. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
  259. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
  260. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
  261. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
  262. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
  263. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
  264. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
  265. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
  266. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
  267. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
  268. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
  269. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
  270. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
  271. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
  272. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
  273. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
  274. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
  275. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
  276. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
  277. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
  278. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
  279. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
  281. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
  282. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
  283. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
  284. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
  285. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
  286. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
  287. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
  288. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
  289. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
  290. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
  291. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
  299. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
  301. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
  302. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
  303. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
  304. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
  305. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
  306. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
  307. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
  308. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
  309. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
  310. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  311. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
  312. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
  313. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
  314. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
  315. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
  316. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
  317. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
  318. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
  319. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
  320. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
  321. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
  322. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
  323. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
  324. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
  326. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
  327. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
  328. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
  329. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
  330. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
  331. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
  332. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
  333. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
  334. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
  335. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
  336. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
  337. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
  338. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
  339. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
  340. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
  341. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
  342. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
  343. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
  344. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
  345. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
  346. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
  347. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
  348. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
  349. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
  350. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
  351. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
  352. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
  353. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
  354. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
  392. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
  394. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
  395. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
  396. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
  397. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
  398. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
  402. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
  403. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  404. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
  405. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
  406. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
  407. data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
  408. data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
  409. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  410. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  411. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  412. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
  413. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
  414. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
  415. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
  416. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
  446. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
  447. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
  448. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
  449. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
  450. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
  451. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
  452. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
  453. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
  454. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
  455. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
  456. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
  457. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
  459. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
  460. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
  461. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
  462. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
  463. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
  464. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  465. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  466. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  467. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  468. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  469. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  470. data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
  471. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
  472. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
  473. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
  474. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
  475. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
  476. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
  477. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
  478. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
  479. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
  480. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
  481. data/vendor/fastText/website/static/fasttext.css +48 -0
  482. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  483. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  484. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  485. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  486. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  487. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  488. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  489. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  490. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  491. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  492. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  493. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  494. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  495. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  496. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  497. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  498. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  499. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  500. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  501. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  502. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  503. data/vendor/fastText/website/static/img/model-black.png +0 -0
  504. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  505. data/vendor/fastText/website/static/img/model-red.png +0 -0
  506. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  507. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  508. data/vendor/fastText/wikifil.pl +57 -0
  509. data/vendor/fastText/word-vector-example.sh +39 -0
  510. metadata +621 -0
@@ -0,0 +1,38 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # copyright (c) 2017-present, facebook, inc.
4
+ # all rights reserved.
5
+ #
6
+ # this source code is licensed under the MIT license found in the
7
+ # license file in the root directory of this source tree.
8
+ #
9
+ # script for SVO
10
+ DIR=data/SVO-tensor-dataset
11
+ FASTTEXTDIR=../../
12
+
13
+ # compile
14
+ pushd $FASTTEXTDIR
15
+ make opt
16
+ popd
17
+ ft=${FASTTEXTDIR}/fasttext
18
+
19
+ ## Train model and test it on validation:
20
+
21
+ dim=200
22
+ epoch=3
23
+ model=svo
24
+
25
+ echo "---- train ----"
26
+ time $ft supervised -input ${DIR}/ft_svo_data_train_1000000.dat \
27
+ -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
28
+
29
+ echo "computing raw hit@5%..."
30
+ $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
31
+
32
+
33
+ echo "---- train + valid ----"
34
+ time $ft supervised -input ${DIR}/ft_svo_data-valid+train.dat \
35
+ -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20
36
+
37
+ echo "computing raw hit@5%..."
38
+ $ft test ${model}.bin ${DIR}/ft_svo_data_test_250000.dat 227 2> /dev/null | awk '{if(NR==3) print "raw hit@5%="$2}'
@@ -0,0 +1,49 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # copyright (c) 2017-present, facebook, inc.
4
+ # all rights reserved.
5
+ #
6
+ # this source code is licensed under the MIT license found in the
7
+ # license file in the root directory of this source tree.
8
+ #
9
+ # script for WN11
10
+ DIR=data/wordnet-mlj12/
11
+ FASTTEXTDIR=../../
12
+
13
+ # compile
14
+
15
+ pushd $FASTTEXTDIR
16
+ make opt
17
+ popd
18
+ ft=${FASTTEXTDIR}/fasttext
19
+
20
+ g++ -std=c++0x eval.cpp -o eval
21
+
22
+ # Train model and test it:
23
+ dim=100
24
+ epoch=100
25
+ neg=500
26
+ model=data/wn
27
+ pred=data/wnpred
28
+
29
+ echo "---- train ----"
30
+ $ft supervised -input ${DIR}/ft_wordnet-mlj12-train.txt \
31
+ -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
32
+
33
+ echo "computing raw hits@10..."
34
+ $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
35
+
36
+ echo "computing filtered hit@10..."
37
+ $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
38
+ ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
39
+
40
+ echo "---- train+val ----"
41
+ $ft supervised -input ${DIR}/ft_wordnet-mlj12-valid+train.txt \
42
+ -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg
43
+
44
+ echo "computing raw hits@10..."
45
+ $ft test ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10 = "$2}'
46
+
47
+ echo "computing filtered hit@10..."
48
+ $ft predict ${model}.bin ${DIR}/ft_wordnet-mlj12-test.txt 20000 > $pred
49
+ ./eval $pred ${DIR}/ft_wordnet-mlj12-test.txt $DIR/ft_wordnet-mlj12-full.txt 10 | awk '{if(NR==2) print "filtered hit@10 = "$2}'
@@ -0,0 +1,43 @@
1
+ #!/usr/bin/env bash
2
+ #
3
+ # Copyright (c) 2016-present, Facebook, Inc.
4
+ # All rights reserved.
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ # This script applies quantization to the models from Table 1 in:
11
+ # Bag of Tricks for Efficient Text Classification, arXiv 1607.01759, 2016
12
+
13
+ set -e
14
+
15
+ DATASET=(
16
+ ag_news
17
+ sogou_news
18
+ dbpedia
19
+ yelp_review_polarity
20
+ yelp_review_full
21
+ yahoo_answers
22
+ amazon_review_full
23
+ amazon_review_polarity
24
+ )
25
+
26
+ # These learning rates were chosen by validation on a subset of the training set.
27
+ LR=( 0.25 0.5 0.5 0.1 0.1 0.1 0.05 0.05 )
28
+
29
+ RESULTDIR=result
30
+ DATADIR=data
31
+
32
+ echo 'Warning! Make sure you run the classification-results.sh script before this one'
33
+ echo 'Otherwise you can expect the commands in this script to fail'
34
+
35
+ for i in {0..7}
36
+ do
37
+ echo "Working on dataset ${DATASET[i]}"
38
+ ../../fasttext quantize -input "${DATADIR}/${DATASET[i]}.train" \
39
+ -output "${RESULTDIR}/${DATASET[i]}" -lr "${LR[i]}" \
40
+ -thread 4 -qnorm -retrain -epoch 5 -cutoff 100000 > /dev/null
41
+ ../../fasttext test "${RESULTDIR}/${DATASET[i]}.ftz" \
42
+ "${DATADIR}/${DATASET[i]}.test"
43
+ done
@@ -0,0 +1,2 @@
1
+ [metadata]
2
+ description-file = README.md
@@ -0,0 +1,203 @@
1
+ #!/usr/bin/env python
2
+
3
+ # Copyright (c) 2017-present, Facebook, Inc.
4
+ # All rights reserved.
5
+ #
6
+ # This source code is licensed under the MIT license found in the
7
+ # LICENSE file in the root directory of this source tree.
8
+ #
9
+
10
+ from __future__ import absolute_import
11
+ from __future__ import division
12
+ from __future__ import print_function
13
+ from __future__ import unicode_literals
14
+
15
+ from setuptools import setup, Extension
16
+ from setuptools.command.build_ext import build_ext
17
+ import sys
18
+ import setuptools
19
+ import os
20
+ import subprocess
21
+ import platform
22
+ import io
23
+
24
+ __version__ = '0.9.1'
25
+ FASTTEXT_SRC = "src"
26
+
27
+ # Based on https://github.com/pybind/python_example
28
+
29
+ class get_pybind_include(object):
30
+ """Helper class to determine the pybind11 include path
31
+
32
+ The purpose of this class is to postpone importing pybind11
33
+ until it is actually installed, so that the ``get_include()``
34
+ method can be invoked. """
35
+
36
+ def __init__(self, user=False):
37
+ try:
38
+ import pybind11
39
+ except ImportError:
40
+ if subprocess.call([sys.executable, '-m', 'pip', 'install', 'pybind11']):
41
+ raise RuntimeError('pybind11 install failed.')
42
+
43
+ self.user = user
44
+
45
+ def __str__(self):
46
+ import pybind11
47
+ return pybind11.get_include(self.user)
48
+
49
+ try:
50
+ coverage_index = sys.argv.index('--coverage')
51
+ except ValueError:
52
+ coverage = False
53
+ else:
54
+ del sys.argv[coverage_index]
55
+ coverage = True
56
+
57
+ fasttext_src_files = map(str, os.listdir(FASTTEXT_SRC))
58
+ fasttext_src_cc = list(filter(lambda x: x.endswith('.cc'), fasttext_src_files))
59
+
60
+ fasttext_src_cc = list(
61
+ map(lambda x: str(os.path.join(FASTTEXT_SRC, x)), fasttext_src_cc)
62
+ )
63
+
64
+ ext_modules = [
65
+ Extension(
66
+ str('fasttext_pybind'),
67
+ [
68
+ str('python/fasttext_module/fasttext/pybind/fasttext_pybind.cc'),
69
+ ] + fasttext_src_cc,
70
+ include_dirs=[
71
+ # Path to pybind11 headers
72
+ get_pybind_include(),
73
+ get_pybind_include(user=True),
74
+ # Path to fasttext source code
75
+ FASTTEXT_SRC,
76
+ ],
77
+ language='c++',
78
+ extra_compile_args=["-O0 -fno-inline -fprofile-arcs -pthread -march=native" if coverage else
79
+ "-O3 -funroll-loops -pthread -march=native"],
80
+ ),
81
+ ]
82
+
83
+
84
+ # As of Python 3.6, CCompiler has a `has_flag` method.
85
+ # cf http://bugs.python.org/issue26689
86
+ def has_flag(compiler, flags):
87
+ """Return a boolean indicating whether a flag name is supported on
88
+ the specified compiler.
89
+ """
90
+ import tempfile
91
+ with tempfile.NamedTemporaryFile('w', suffix='.cpp') as f:
92
+ f.write('int main (int argc, char **argv) { return 0; }')
93
+ try:
94
+ compiler.compile([f.name], extra_postargs=flags)
95
+ except setuptools.distutils.errors.CompileError:
96
+ return False
97
+ return True
98
+
99
+
100
+ def cpp_flag(compiler):
101
+ """Return the -std=c++[0x/11/14] compiler flag.
102
+ The c++14 is preferred over c++0x/11 (when it is available).
103
+ """
104
+ standards = ['-std=c++14', '-std=c++11', '-std=c++0x']
105
+ for standard in standards:
106
+ if has_flag(compiler, [standard]):
107
+ return standard
108
+ raise RuntimeError(
109
+ 'Unsupported compiler -- at least C++0x support '
110
+ 'is needed!'
111
+ )
112
+
113
+
114
+ class BuildExt(build_ext):
115
+ """A custom build extension for adding compiler-specific options."""
116
+ c_opts = {
117
+ 'msvc': ['/EHsc'],
118
+ 'unix': [],
119
+ }
120
+
121
+ def build_extensions(self):
122
+ if sys.platform == 'darwin':
123
+ mac_osx_version = float('.'.join(platform.mac_ver()[0].split('.')[:2]))
124
+ os.environ['MACOSX_DEPLOYMENT_TARGET'] = str(mac_osx_version)
125
+ all_flags = ['-stdlib=libc++', '-mmacosx-version-min=10.7']
126
+ if has_flag(self.compiler, [all_flags[0]]):
127
+ self.c_opts['unix'] += [all_flags[0]]
128
+ elif has_flag(self.compiler, all_flags):
129
+ self.c_opts['unix'] += all_flags
130
+ else:
131
+ raise RuntimeError(
132
+ 'libc++ is needed! Failed to compile with {} and {}.'.
133
+ format(" ".join(all_flags), all_flags[0])
134
+ )
135
+ ct = self.compiler.compiler_type
136
+ opts = self.c_opts.get(ct, [])
137
+ extra_link_args = []
138
+
139
+ if coverage:
140
+ coverage_option = '--coverage'
141
+ opts.append(coverage_option)
142
+ extra_link_args.append(coverage_option)
143
+
144
+ if ct == 'unix':
145
+ opts.append('-DVERSION_INFO="%s"' % self.distribution.get_version())
146
+ opts.append(cpp_flag(self.compiler))
147
+ if has_flag(self.compiler, ['-fvisibility=hidden']):
148
+ opts.append('-fvisibility=hidden')
149
+ elif ct == 'msvc':
150
+ opts.append(
151
+ '/DVERSION_INFO=\\"%s\\"' % self.distribution.get_version()
152
+ )
153
+ for ext in self.extensions:
154
+ ext.extra_compile_args = opts
155
+ ext.extra_link_args = extra_link_args
156
+ build_ext.build_extensions(self)
157
+
158
+
159
+ def _get_readme():
160
+ """
161
+ Use pandoc to generate rst from md.
162
+ pandoc --from=markdown --to=rst --output=python/README.rst python/README.md
163
+ """
164
+ with io.open("python/README.rst", encoding='utf-8') as fid:
165
+ return fid.read()
166
+
167
+
168
+ setup(
169
+ name='fasttext',
170
+ version=__version__,
171
+ author='Onur Celebi',
172
+ author_email='celebio@fb.com',
173
+ description='fasttext Python bindings',
174
+ long_description=_get_readme(),
175
+ ext_modules=ext_modules,
176
+ url='https://github.com/facebookresearch/fastText',
177
+ license='MIT',
178
+ classifiers=[
179
+ 'Development Status :: 3 - Alpha',
180
+ 'Intended Audience :: Developers',
181
+ 'Intended Audience :: Science/Research',
182
+ 'License :: OSI Approved :: MIT License',
183
+ 'Programming Language :: Python :: 2.7',
184
+ 'Programming Language :: Python :: 3.4',
185
+ 'Programming Language :: Python :: 3.5',
186
+ 'Programming Language :: Python :: 3.6',
187
+ 'Topic :: Software Development',
188
+ 'Topic :: Scientific/Engineering',
189
+ 'Operating System :: Microsoft :: Windows',
190
+ 'Operating System :: POSIX',
191
+ 'Operating System :: Unix',
192
+ 'Operating System :: MacOS',
193
+ ],
194
+ install_requires=['pybind11>=2.2', "setuptools >= 0.7.0", "numpy"],
195
+ cmdclass={'build_ext': BuildExt},
196
+ packages=[
197
+ str('fasttext'),
198
+ str('fasttext.util'),
199
+ str('fasttext.tests'),
200
+ ],
201
+ package_dir={str(''): str('python/fasttext_module')},
202
+ zip_safe=False,
203
+ )
@@ -0,0 +1,320 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #include "args.h"
10
+
11
+ #include <stdlib.h>
12
+
13
+ #include <iostream>
14
+ #include <stdexcept>
15
+
16
+ namespace fasttext {
17
+
18
+ Args::Args() {
19
+ lr = 0.05;
20
+ dim = 100;
21
+ ws = 5;
22
+ epoch = 5;
23
+ minCount = 5;
24
+ minCountLabel = 0;
25
+ neg = 5;
26
+ wordNgrams = 1;
27
+ loss = loss_name::ns;
28
+ model = model_name::sg;
29
+ bucket = 2000000;
30
+ minn = 3;
31
+ maxn = 6;
32
+ thread = 12;
33
+ lrUpdateRate = 100;
34
+ t = 1e-4;
35
+ label = "__label__";
36
+ verbose = 2;
37
+ pretrainedVectors = "";
38
+ saveOutput = false;
39
+
40
+ qout = false;
41
+ retrain = false;
42
+ qnorm = false;
43
+ cutoff = 0;
44
+ dsub = 2;
45
+ }
46
+
47
+ std::string Args::lossToString(loss_name ln) const {
48
+ switch (ln) {
49
+ case loss_name::hs:
50
+ return "hs";
51
+ case loss_name::ns:
52
+ return "ns";
53
+ case loss_name::softmax:
54
+ return "softmax";
55
+ case loss_name::ova:
56
+ return "one-vs-all";
57
+ }
58
+ return "Unknown loss!"; // should never happen
59
+ }
60
+
61
+ std::string Args::boolToString(bool b) const {
62
+ if (b) {
63
+ return "true";
64
+ } else {
65
+ return "false";
66
+ }
67
+ }
68
+
69
+ std::string Args::modelToString(model_name mn) const {
70
+ switch (mn) {
71
+ case model_name::cbow:
72
+ return "cbow";
73
+ case model_name::sg:
74
+ return "sg";
75
+ case model_name::sup:
76
+ return "sup";
77
+ }
78
+ return "Unknown model name!"; // should never happen
79
+ }
80
+
81
+ void Args::parseArgs(const std::vector<std::string>& args) {
82
+ std::string command(args[1]);
83
+ if (command == "supervised") {
84
+ model = model_name::sup;
85
+ loss = loss_name::softmax;
86
+ minCount = 1;
87
+ minn = 0;
88
+ maxn = 0;
89
+ lr = 0.1;
90
+ } else if (command == "cbow") {
91
+ model = model_name::cbow;
92
+ }
93
+ for (int ai = 2; ai < args.size(); ai += 2) {
94
+ if (args[ai][0] != '-') {
95
+ std::cerr << "Provided argument without a dash! Usage:" << std::endl;
96
+ printHelp();
97
+ exit(EXIT_FAILURE);
98
+ }
99
+ try {
100
+ if (args[ai] == "-h") {
101
+ std::cerr << "Here is the help! Usage:" << std::endl;
102
+ printHelp();
103
+ exit(EXIT_FAILURE);
104
+ } else if (args[ai] == "-input") {
105
+ input = std::string(args.at(ai + 1));
106
+ } else if (args[ai] == "-output") {
107
+ output = std::string(args.at(ai + 1));
108
+ } else if (args[ai] == "-lr") {
109
+ lr = std::stof(args.at(ai + 1));
110
+ } else if (args[ai] == "-lrUpdateRate") {
111
+ lrUpdateRate = std::stoi(args.at(ai + 1));
112
+ } else if (args[ai] == "-dim") {
113
+ dim = std::stoi(args.at(ai + 1));
114
+ } else if (args[ai] == "-ws") {
115
+ ws = std::stoi(args.at(ai + 1));
116
+ } else if (args[ai] == "-epoch") {
117
+ epoch = std::stoi(args.at(ai + 1));
118
+ } else if (args[ai] == "-minCount") {
119
+ minCount = std::stoi(args.at(ai + 1));
120
+ } else if (args[ai] == "-minCountLabel") {
121
+ minCountLabel = std::stoi(args.at(ai + 1));
122
+ } else if (args[ai] == "-neg") {
123
+ neg = std::stoi(args.at(ai + 1));
124
+ } else if (args[ai] == "-wordNgrams") {
125
+ wordNgrams = std::stoi(args.at(ai + 1));
126
+ } else if (args[ai] == "-loss") {
127
+ if (args.at(ai + 1) == "hs") {
128
+ loss = loss_name::hs;
129
+ } else if (args.at(ai + 1) == "ns") {
130
+ loss = loss_name::ns;
131
+ } else if (args.at(ai + 1) == "softmax") {
132
+ loss = loss_name::softmax;
133
+ } else if (
134
+ args.at(ai + 1) == "one-vs-all" || args.at(ai + 1) == "ova") {
135
+ loss = loss_name::ova;
136
+ } else {
137
+ std::cerr << "Unknown loss: " << args.at(ai + 1) << std::endl;
138
+ printHelp();
139
+ exit(EXIT_FAILURE);
140
+ }
141
+ } else if (args[ai] == "-bucket") {
142
+ bucket = std::stoi(args.at(ai + 1));
143
+ } else if (args[ai] == "-minn") {
144
+ minn = std::stoi(args.at(ai + 1));
145
+ } else if (args[ai] == "-maxn") {
146
+ maxn = std::stoi(args.at(ai + 1));
147
+ } else if (args[ai] == "-thread") {
148
+ thread = std::stoi(args.at(ai + 1));
149
+ } else if (args[ai] == "-t") {
150
+ t = std::stof(args.at(ai + 1));
151
+ } else if (args[ai] == "-label") {
152
+ label = std::string(args.at(ai + 1));
153
+ } else if (args[ai] == "-verbose") {
154
+ verbose = std::stoi(args.at(ai + 1));
155
+ } else if (args[ai] == "-pretrainedVectors") {
156
+ pretrainedVectors = std::string(args.at(ai + 1));
157
+ } else if (args[ai] == "-saveOutput") {
158
+ saveOutput = true;
159
+ ai--;
160
+ } else if (args[ai] == "-qnorm") {
161
+ qnorm = true;
162
+ ai--;
163
+ } else if (args[ai] == "-retrain") {
164
+ retrain = true;
165
+ ai--;
166
+ } else if (args[ai] == "-qout") {
167
+ qout = true;
168
+ ai--;
169
+ } else if (args[ai] == "-cutoff") {
170
+ cutoff = std::stoi(args.at(ai + 1));
171
+ } else if (args[ai] == "-dsub") {
172
+ dsub = std::stoi(args.at(ai + 1));
173
+ } else {
174
+ std::cerr << "Unknown argument: " << args[ai] << std::endl;
175
+ printHelp();
176
+ exit(EXIT_FAILURE);
177
+ }
178
+ } catch (std::out_of_range) {
179
+ std::cerr << args[ai] << " is missing an argument" << std::endl;
180
+ printHelp();
181
+ exit(EXIT_FAILURE);
182
+ }
183
+ }
184
+ if (input.empty() || output.empty()) {
185
+ std::cerr << "Empty input or output path." << std::endl;
186
+ printHelp();
187
+ exit(EXIT_FAILURE);
188
+ }
189
+ if (wordNgrams <= 1 && maxn == 0) {
190
+ bucket = 0;
191
+ }
192
+ }
193
+
194
+ void Args::printHelp() {
195
+ printBasicHelp();
196
+ printDictionaryHelp();
197
+ printTrainingHelp();
198
+ printQuantizationHelp();
199
+ }
200
+
201
+ void Args::printBasicHelp() {
202
+ std::cerr << "\nThe following arguments are mandatory:\n"
203
+ << " -input training file path\n"
204
+ << " -output output file path\n"
205
+ << "\nThe following arguments are optional:\n"
206
+ << " -verbose verbosity level [" << verbose << "]\n";
207
+ }
208
+
209
+ void Args::printDictionaryHelp() {
210
+ std::cerr << "\nThe following arguments for the dictionary are optional:\n"
211
+ << " -minCount minimal number of word occurences ["
212
+ << minCount << "]\n"
213
+ << " -minCountLabel minimal number of label occurences ["
214
+ << minCountLabel << "]\n"
215
+ << " -wordNgrams max length of word ngram [" << wordNgrams
216
+ << "]\n"
217
+ << " -bucket number of buckets [" << bucket << "]\n"
218
+ << " -minn min length of char ngram [" << minn
219
+ << "]\n"
220
+ << " -maxn max length of char ngram [" << maxn
221
+ << "]\n"
222
+ << " -t sampling threshold [" << t << "]\n"
223
+ << " -label labels prefix [" << label << "]\n";
224
+ }
225
+
226
+ void Args::printTrainingHelp() {
227
+ std::cerr
228
+ << "\nThe following arguments for training are optional:\n"
229
+ << " -lr learning rate [" << lr << "]\n"
230
+ << " -lrUpdateRate change the rate of updates for the learning rate ["
231
+ << lrUpdateRate << "]\n"
232
+ << " -dim size of word vectors [" << dim << "]\n"
233
+ << " -ws size of the context window [" << ws << "]\n"
234
+ << " -epoch number of epochs [" << epoch << "]\n"
235
+ << " -neg number of negatives sampled [" << neg << "]\n"
236
+ << " -loss loss function {ns, hs, softmax, one-vs-all} ["
237
+ << lossToString(loss) << "]\n"
238
+ << " -thread number of threads [" << thread << "]\n"
239
+ << " -pretrainedVectors pretrained word vectors for supervised learning ["
240
+ << pretrainedVectors << "]\n"
241
+ << " -saveOutput whether output params should be saved ["
242
+ << boolToString(saveOutput) << "]\n";
243
+ }
244
+
245
+ void Args::printQuantizationHelp() {
246
+ std::cerr
247
+ << "\nThe following arguments for quantization are optional:\n"
248
+ << " -cutoff number of words and ngrams to retain ["
249
+ << cutoff << "]\n"
250
+ << " -retrain whether embeddings are finetuned if a cutoff is applied ["
251
+ << boolToString(retrain) << "]\n"
252
+ << " -qnorm whether the norm is quantized separately ["
253
+ << boolToString(qnorm) << "]\n"
254
+ << " -qout whether the classifier is quantized ["
255
+ << boolToString(qout) << "]\n"
256
+ << " -dsub size of each sub-vector [" << dsub << "]\n";
257
+ }
258
+
259
+ void Args::save(std::ostream& out) {
260
+ out.write((char*)&(dim), sizeof(int));
261
+ out.write((char*)&(ws), sizeof(int));
262
+ out.write((char*)&(epoch), sizeof(int));
263
+ out.write((char*)&(minCount), sizeof(int));
264
+ out.write((char*)&(neg), sizeof(int));
265
+ out.write((char*)&(wordNgrams), sizeof(int));
266
+ out.write((char*)&(loss), sizeof(loss_name));
267
+ out.write((char*)&(model), sizeof(model_name));
268
+ out.write((char*)&(bucket), sizeof(int));
269
+ out.write((char*)&(minn), sizeof(int));
270
+ out.write((char*)&(maxn), sizeof(int));
271
+ out.write((char*)&(lrUpdateRate), sizeof(int));
272
+ out.write((char*)&(t), sizeof(double));
273
+ }
274
+
275
+ void Args::load(std::istream& in) {
276
+ in.read((char*)&(dim), sizeof(int));
277
+ in.read((char*)&(ws), sizeof(int));
278
+ in.read((char*)&(epoch), sizeof(int));
279
+ in.read((char*)&(minCount), sizeof(int));
280
+ in.read((char*)&(neg), sizeof(int));
281
+ in.read((char*)&(wordNgrams), sizeof(int));
282
+ in.read((char*)&(loss), sizeof(loss_name));
283
+ in.read((char*)&(model), sizeof(model_name));
284
+ in.read((char*)&(bucket), sizeof(int));
285
+ in.read((char*)&(minn), sizeof(int));
286
+ in.read((char*)&(maxn), sizeof(int));
287
+ in.read((char*)&(lrUpdateRate), sizeof(int));
288
+ in.read((char*)&(t), sizeof(double));
289
+ }
290
+
291
+ void Args::dump(std::ostream& out) const {
292
+ out << "dim"
293
+ << " " << dim << std::endl;
294
+ out << "ws"
295
+ << " " << ws << std::endl;
296
+ out << "epoch"
297
+ << " " << epoch << std::endl;
298
+ out << "minCount"
299
+ << " " << minCount << std::endl;
300
+ out << "neg"
301
+ << " " << neg << std::endl;
302
+ out << "wordNgrams"
303
+ << " " << wordNgrams << std::endl;
304
+ out << "loss"
305
+ << " " << lossToString(loss) << std::endl;
306
+ out << "model"
307
+ << " " << modelToString(model) << std::endl;
308
+ out << "bucket"
309
+ << " " << bucket << std::endl;
310
+ out << "minn"
311
+ << " " << minn << std::endl;
312
+ out << "maxn"
313
+ << " " << maxn << std::endl;
314
+ out << "lrUpdateRate"
315
+ << " " << lrUpdateRate << std::endl;
316
+ out << "t"
317
+ << " " << t << std::endl;
318
+ }
319
+
320
+ } // namespace fasttext