fasttext 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +251 -0
  5. data/ext/fasttext/ext.cpp +291 -0
  6. data/ext/fasttext/extconf.rb +15 -0
  7. data/lib/fasttext.rb +41 -0
  8. data/lib/fasttext/classifier.rb +92 -0
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/lib/fasttext/model.rb +60 -0
  11. data/lib/fasttext/vectorizer.rb +58 -0
  12. data/lib/fasttext/version.rb +3 -0
  13. data/vendor/fastText/CMakeLists.txt +68 -0
  14. data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
  15. data/vendor/fastText/CONTRIBUTING.md +32 -0
  16. data/vendor/fastText/LICENSE +21 -0
  17. data/vendor/fastText/MANIFEST.in +5 -0
  18. data/vendor/fastText/Makefile +63 -0
  19. data/vendor/fastText/README.md +339 -0
  20. data/vendor/fastText/alignment/README.md +53 -0
  21. data/vendor/fastText/alignment/align.py +145 -0
  22. data/vendor/fastText/alignment/eval.py +60 -0
  23. data/vendor/fastText/alignment/example.sh +51 -0
  24. data/vendor/fastText/alignment/unsup_align.py +109 -0
  25. data/vendor/fastText/alignment/utils.py +154 -0
  26. data/vendor/fastText/classification-example.sh +41 -0
  27. data/vendor/fastText/classification-results.sh +94 -0
  28. data/vendor/fastText/crawl/README.md +26 -0
  29. data/vendor/fastText/crawl/dedup.cc +51 -0
  30. data/vendor/fastText/crawl/download_crawl.sh +57 -0
  31. data/vendor/fastText/crawl/filter_dedup.sh +13 -0
  32. data/vendor/fastText/crawl/filter_utf8.cc +105 -0
  33. data/vendor/fastText/crawl/process_wet_file.sh +30 -0
  34. data/vendor/fastText/docs/aligned-vectors.md +64 -0
  35. data/vendor/fastText/docs/api.md +6 -0
  36. data/vendor/fastText/docs/cheatsheet.md +66 -0
  37. data/vendor/fastText/docs/crawl-vectors.md +125 -0
  38. data/vendor/fastText/docs/dataset.md +6 -0
  39. data/vendor/fastText/docs/english-vectors.md +53 -0
  40. data/vendor/fastText/docs/faqs.md +63 -0
  41. data/vendor/fastText/docs/language-identification.md +47 -0
  42. data/vendor/fastText/docs/options.md +50 -0
  43. data/vendor/fastText/docs/pretrained-vectors.md +142 -0
  44. data/vendor/fastText/docs/python-module.md +314 -0
  45. data/vendor/fastText/docs/references.md +41 -0
  46. data/vendor/fastText/docs/supervised-models.md +54 -0
  47. data/vendor/fastText/docs/supervised-tutorial.md +349 -0
  48. data/vendor/fastText/docs/support.md +58 -0
  49. data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
  50. data/vendor/fastText/eval.py +95 -0
  51. data/vendor/fastText/get-wikimedia.sh +79 -0
  52. data/vendor/fastText/python/README.md +322 -0
  53. data/vendor/fastText/python/README.rst +406 -0
  54. data/vendor/fastText/python/benchmarks/README.rst +3 -0
  55. data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
  56. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
  57. data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
  58. data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
  59. data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
  60. data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
  61. data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
  62. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
  63. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
  64. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
  65. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
  66. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
  67. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
  68. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
  69. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
  70. data/vendor/fastText/quantization-example.sh +40 -0
  71. data/vendor/fastText/runtests.py +60 -0
  72. data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
  73. data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
  74. data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
  75. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
  76. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
  77. data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
  78. data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
  79. data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
  80. data/vendor/fastText/setup.cfg +2 -0
  81. data/vendor/fastText/setup.py +203 -0
  82. data/vendor/fastText/src/args.cc +320 -0
  83. data/vendor/fastText/src/args.h +68 -0
  84. data/vendor/fastText/src/densematrix.cc +155 -0
  85. data/vendor/fastText/src/densematrix.h +75 -0
  86. data/vendor/fastText/src/dictionary.cc +540 -0
  87. data/vendor/fastText/src/dictionary.h +111 -0
  88. data/vendor/fastText/src/fasttext.cc +821 -0
  89. data/vendor/fastText/src/fasttext.h +191 -0
  90. data/vendor/fastText/src/loss.cc +346 -0
  91. data/vendor/fastText/src/loss.h +163 -0
  92. data/vendor/fastText/src/main.cc +435 -0
  93. data/vendor/fastText/src/matrix.cc +25 -0
  94. data/vendor/fastText/src/matrix.h +44 -0
  95. data/vendor/fastText/src/meter.cc +68 -0
  96. data/vendor/fastText/src/meter.h +69 -0
  97. data/vendor/fastText/src/model.cc +98 -0
  98. data/vendor/fastText/src/model.h +79 -0
  99. data/vendor/fastText/src/productquantizer.cc +251 -0
  100. data/vendor/fastText/src/productquantizer.h +63 -0
  101. data/vendor/fastText/src/quantmatrix.cc +117 -0
  102. data/vendor/fastText/src/quantmatrix.h +60 -0
  103. data/vendor/fastText/src/real.h +15 -0
  104. data/vendor/fastText/src/utils.cc +28 -0
  105. data/vendor/fastText/src/utils.h +43 -0
  106. data/vendor/fastText/src/vector.cc +97 -0
  107. data/vendor/fastText/src/vector.h +61 -0
  108. data/vendor/fastText/tests/fetch_test_data.sh +202 -0
  109. data/vendor/fastText/website/README.md +6 -0
  110. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
  111. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
  112. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
  113. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
  114. data/vendor/fastText/website/core/Footer.js +127 -0
  115. data/vendor/fastText/website/package.json +12 -0
  116. data/vendor/fastText/website/pages/en/index.js +286 -0
  117. data/vendor/fastText/website/sidebars.json +18 -0
  118. data/vendor/fastText/website/siteConfig.js +102 -0
  119. data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
  120. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
  121. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
  122. data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
  123. data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
  124. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
  125. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  126. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  127. data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
  141. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
  142. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
  143. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
  144. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
  145. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
  146. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
  147. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
  148. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
  149. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
  150. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
  151. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
  152. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  153. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
  154. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
  155. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
  156. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
  157. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
  158. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
  159. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
  161. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  162. data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
  163. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
  164. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
  165. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
  166. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
  167. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  168. data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
  169. data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
  170. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  171. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  172. data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
  173. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
  174. data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
  175. data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
  176. data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
  177. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
  178. data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
  179. data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
  180. data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
  181. data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
  182. data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
  183. data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
  184. data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
  185. data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
  186. data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
  187. data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
  188. data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
  189. data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
  190. data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
  191. data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
  192. data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
  193. data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
  194. data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
  195. data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
  196. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
  197. data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
  198. data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
  199. data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
  200. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
  201. data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
  202. data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
  203. data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
  204. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
  205. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
  206. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
  207. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
  208. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
  209. data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
  210. data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
  211. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
  212. data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
  213. data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
  214. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
  215. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
  216. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
  217. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
  218. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
  219. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
  220. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
  221. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
  222. data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
  223. data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
  224. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  225. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  226. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  227. data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
  228. data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
  229. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
  230. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
  231. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
  232. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  233. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
  234. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
  235. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
  236. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
  237. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
  238. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
  239. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
  240. data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
  241. data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
  242. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
  243. data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
  244. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
  245. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
  246. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
  247. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
  248. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
  249. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
  250. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
  251. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
  252. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
  253. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
  254. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
  255. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
  256. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
  257. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
  258. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
  259. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
  260. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
  261. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
  262. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
  263. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
  264. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
  265. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
  266. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
  267. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
  268. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
  269. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
  270. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
  271. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
  272. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
  273. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
  274. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
  275. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
  276. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
  277. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
  278. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
  279. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
  281. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
  282. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
  283. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
  284. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
  285. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
  286. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
  287. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
  288. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
  289. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
  290. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
  291. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
  299. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
  301. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
  302. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
  303. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
  304. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
  305. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
  306. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
  307. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
  308. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
  309. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
  310. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  311. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
  312. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
  313. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
  314. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
  315. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
  316. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
  317. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
  318. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
  319. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
  320. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
  321. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
  322. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
  323. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
  324. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
  326. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
  327. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
  328. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
  329. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
  330. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
  331. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
  332. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
  333. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
  334. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
  335. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
  336. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
  337. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
  338. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
  339. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
  340. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
  341. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
  342. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
  343. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
  344. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
  345. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
  346. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
  347. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
  348. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
  349. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
  350. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
  351. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
  352. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
  353. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
  354. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
  392. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
  394. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
  395. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
  396. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
  397. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
  398. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
  402. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
  403. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  404. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
  405. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
  406. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
  407. data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
  408. data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
  409. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  410. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  411. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  412. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
  413. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
  414. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
  415. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
  416. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
  446. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
  447. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
  448. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
  449. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
  450. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
  451. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
  452. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
  453. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
  454. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
  455. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
  456. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
  457. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
  459. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
  460. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
  461. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
  462. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
  463. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
  464. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  465. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  466. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  467. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  468. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  469. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  470. data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
  471. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
  472. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
  473. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
  474. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
  475. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
  476. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
  477. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
  478. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
  479. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
  480. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
  481. data/vendor/fastText/website/static/fasttext.css +48 -0
  482. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  483. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  484. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  485. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  486. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  487. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  488. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  489. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  490. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  491. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  492. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  493. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  494. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  495. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  496. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  497. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  498. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  499. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  500. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  501. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  502. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  503. data/vendor/fastText/website/static/img/model-black.png +0 -0
  504. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  505. data/vendor/fastText/website/static/img/model-red.png +0 -0
  506. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  507. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  508. data/vendor/fastText/wikifil.pl +57 -0
  509. data/vendor/fastText/word-vector-example.sh +39 -0
  510. metadata +621 -0
@@ -0,0 +1,163 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #pragma once
10
+
11
+ #include <memory>
12
+ #include <random>
13
+ #include <vector>
14
+
15
+ #include "matrix.h"
16
+ #include "model.h"
17
+ #include "real.h"
18
+ #include "utils.h"
19
+ #include "vector.h"
20
+
21
+ namespace fasttext {
22
+
23
+ class Loss {
24
+ private:
25
+ void findKBest(
26
+ int32_t k,
27
+ real threshold,
28
+ Predictions& heap,
29
+ const Vector& output) const;
30
+
31
+ protected:
32
+ std::vector<real> t_sigmoid_;
33
+ std::vector<real> t_log_;
34
+ std::shared_ptr<Matrix>& wo_;
35
+
36
+ real log(real x) const;
37
+ real sigmoid(real x) const;
38
+
39
+ public:
40
+ explicit Loss(std::shared_ptr<Matrix>& wo);
41
+ virtual ~Loss() = default;
42
+
43
+ virtual real forward(
44
+ const std::vector<int32_t>& targets,
45
+ int32_t targetIndex,
46
+ Model::State& state,
47
+ real lr,
48
+ bool backprop) = 0;
49
+ virtual void computeOutput(Model::State& state) const = 0;
50
+
51
+ virtual void predict(
52
+ int32_t /*k*/,
53
+ real /*threshold*/,
54
+ Predictions& /*heap*/,
55
+ Model::State& /*state*/) const;
56
+ };
57
+
58
+ class BinaryLogisticLoss : public Loss {
59
+ protected:
60
+ real binaryLogistic(
61
+ int32_t target,
62
+ Model::State& state,
63
+ bool labelIsPositive,
64
+ real lr,
65
+ bool backprop) const;
66
+
67
+ public:
68
+ explicit BinaryLogisticLoss(std::shared_ptr<Matrix>& wo);
69
+ virtual ~BinaryLogisticLoss() noexcept override = default;
70
+ void computeOutput(Model::State& state) const override;
71
+ };
72
+
73
+ class OneVsAllLoss : public BinaryLogisticLoss {
74
+ public:
75
+ explicit OneVsAllLoss(std::shared_ptr<Matrix>& wo);
76
+ ~OneVsAllLoss() noexcept override = default;
77
+ real forward(
78
+ const std::vector<int32_t>& targets,
79
+ int32_t targetIndex,
80
+ Model::State& state,
81
+ real lr,
82
+ bool backprop) override;
83
+ };
84
+
85
+ class NegativeSamplingLoss : public BinaryLogisticLoss {
86
+ protected:
87
+ static const int32_t NEGATIVE_TABLE_SIZE = 10000000;
88
+
89
+ int neg_;
90
+ std::vector<int32_t> negatives_;
91
+ std::uniform_int_distribution<size_t> uniform_;
92
+ int32_t getNegative(int32_t target, std::minstd_rand& rng);
93
+
94
+ public:
95
+ explicit NegativeSamplingLoss(
96
+ std::shared_ptr<Matrix>& wo,
97
+ int neg,
98
+ const std::vector<int64_t>& targetCounts);
99
+ ~NegativeSamplingLoss() noexcept override = default;
100
+
101
+ real forward(
102
+ const std::vector<int32_t>& targets,
103
+ int32_t targetIndex,
104
+ Model::State& state,
105
+ real lr,
106
+ bool backprop) override;
107
+ };
108
+
109
+ class HierarchicalSoftmaxLoss : public BinaryLogisticLoss {
110
+ protected:
111
+ struct Node {
112
+ int32_t parent;
113
+ int32_t left;
114
+ int32_t right;
115
+ int64_t count;
116
+ bool binary;
117
+ };
118
+
119
+ std::vector<std::vector<int32_t>> paths_;
120
+ std::vector<std::vector<bool>> codes_;
121
+ std::vector<Node> tree_;
122
+ int32_t osz_;
123
+ void buildTree(const std::vector<int64_t>& counts);
124
+ void dfs(
125
+ int32_t k,
126
+ real threshold,
127
+ int32_t node,
128
+ real score,
129
+ Predictions& heap,
130
+ const Vector& hidden) const;
131
+
132
+ public:
133
+ explicit HierarchicalSoftmaxLoss(
134
+ std::shared_ptr<Matrix>& wo,
135
+ const std::vector<int64_t>& counts);
136
+ ~HierarchicalSoftmaxLoss() noexcept override = default;
137
+ real forward(
138
+ const std::vector<int32_t>& targets,
139
+ int32_t targetIndex,
140
+ Model::State& state,
141
+ real lr,
142
+ bool backprop) override;
143
+ void predict(
144
+ int32_t k,
145
+ real threshold,
146
+ Predictions& heap,
147
+ Model::State& state) const override;
148
+ };
149
+
150
+ class SoftmaxLoss : public Loss {
151
+ public:
152
+ explicit SoftmaxLoss(std::shared_ptr<Matrix>& wo);
153
+ ~SoftmaxLoss() noexcept override = default;
154
+ real forward(
155
+ const std::vector<int32_t>& targets,
156
+ int32_t targetIndex,
157
+ Model::State& state,
158
+ real lr,
159
+ bool backprop) override;
160
+ void computeOutput(Model::State& state) const override;
161
+ };
162
+
163
+ } // namespace fasttext
@@ -0,0 +1,435 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #include <iomanip>
10
+ #include <iostream>
11
+ #include <queue>
12
+ #include <stdexcept>
13
+ #include "args.h"
14
+ #include "fasttext.h"
15
+
16
+ using namespace fasttext;
17
+
18
+ void printUsage() {
19
+ std::cerr
20
+ << "usage: fasttext <command> <args>\n\n"
21
+ << "The commands supported by fasttext are:\n\n"
22
+ << " supervised train a supervised classifier\n"
23
+ << " quantize quantize a model to reduce the memory usage\n"
24
+ << " test evaluate a supervised classifier\n"
25
+ << " test-label print labels with precision and recall scores\n"
26
+ << " predict predict most likely labels\n"
27
+ << " predict-prob predict most likely labels with probabilities\n"
28
+ << " skipgram train a skipgram model\n"
29
+ << " cbow train a cbow model\n"
30
+ << " print-word-vectors print word vectors given a trained model\n"
31
+ << " print-sentence-vectors print sentence vectors given a trained model\n"
32
+ << " print-ngrams print ngrams given a trained model and word\n"
33
+ << " nn query for nearest neighbors\n"
34
+ << " analogies query for analogies\n"
35
+ << " dump dump arguments,dictionary,input/output vectors\n"
36
+ << std::endl;
37
+ }
38
+
39
+ void printQuantizeUsage() {
40
+ std::cerr << "usage: fasttext quantize <args>" << std::endl;
41
+ }
42
+
43
+ void printTestUsage() {
44
+ std::cerr
45
+ << "usage: fasttext test <model> <test-data> [<k>] [<th>]\n\n"
46
+ << " <model> model filename\n"
47
+ << " <test-data> test data filename (if -, read from stdin)\n"
48
+ << " <k> (optional; 1 by default) predict top k labels\n"
49
+ << " <th> (optional; 0.0 by default) probability threshold\n"
50
+ << std::endl;
51
+ }
52
+
53
+ void printPredictUsage() {
54
+ std::cerr
55
+ << "usage: fasttext predict[-prob] <model> <test-data> [<k>] [<th>]\n\n"
56
+ << " <model> model filename\n"
57
+ << " <test-data> test data filename (if -, read from stdin)\n"
58
+ << " <k> (optional; 1 by default) predict top k labels\n"
59
+ << " <th> (optional; 0.0 by default) probability threshold\n"
60
+ << std::endl;
61
+ }
62
+
63
+ void printTestLabelUsage() {
64
+ std::cerr
65
+ << "usage: fasttext test-label <model> <test-data> [<k>] [<th>]\n\n"
66
+ << " <model> model filename\n"
67
+ << " <test-data> test data filename\n"
68
+ << " <k> (optional; 1 by default) predict top k labels\n"
69
+ << " <th> (optional; 0.0 by default) probability threshold\n"
70
+ << std::endl;
71
+ }
72
+
73
+ void printPrintWordVectorsUsage() {
74
+ std::cerr << "usage: fasttext print-word-vectors <model>\n\n"
75
+ << " <model> model filename\n"
76
+ << std::endl;
77
+ }
78
+
79
+ void printPrintSentenceVectorsUsage() {
80
+ std::cerr << "usage: fasttext print-sentence-vectors <model>\n\n"
81
+ << " <model> model filename\n"
82
+ << std::endl;
83
+ }
84
+
85
+ void printPrintNgramsUsage() {
86
+ std::cerr << "usage: fasttext print-ngrams <model> <word>\n\n"
87
+ << " <model> model filename\n"
88
+ << " <word> word to print\n"
89
+ << std::endl;
90
+ }
91
+
92
+ void quantize(const std::vector<std::string>& args) {
93
+ Args a = Args();
94
+ if (args.size() < 3) {
95
+ printQuantizeUsage();
96
+ a.printHelp();
97
+ exit(EXIT_FAILURE);
98
+ }
99
+ a.parseArgs(args);
100
+ FastText fasttext;
101
+ // parseArgs checks if a->output is given.
102
+ fasttext.loadModel(a.output + ".bin");
103
+ fasttext.quantize(a);
104
+ fasttext.saveModel(a.output + ".ftz");
105
+ exit(0);
106
+ }
107
+
108
+ void printNNUsage() {
109
+ std::cout << "usage: fasttext nn <model> <k>\n\n"
110
+ << " <model> model filename\n"
111
+ << " <k> (optional; 10 by default) predict top k labels\n"
112
+ << std::endl;
113
+ }
114
+
115
+ void printAnalogiesUsage() {
116
+ std::cout << "usage: fasttext analogies <model> <k>\n\n"
117
+ << " <model> model filename\n"
118
+ << " <k> (optional; 10 by default) predict top k labels\n"
119
+ << std::endl;
120
+ }
121
+
122
+ void printDumpUsage() {
123
+ std::cout << "usage: fasttext dump <model> <option>\n\n"
124
+ << " <model> model filename\n"
125
+ << " <option> option from args,dict,input,output" << std::endl;
126
+ }
127
+
128
+ void test(const std::vector<std::string>& args) {
129
+ bool perLabel = args[1] == "test-label";
130
+
131
+ if (args.size() < 4 || args.size() > 6) {
132
+ perLabel ? printTestLabelUsage() : printTestUsage();
133
+ exit(EXIT_FAILURE);
134
+ }
135
+
136
+ const auto& model = args[2];
137
+ const auto& input = args[3];
138
+ int32_t k = args.size() > 4 ? std::stoi(args[4]) : 1;
139
+ real threshold = args.size() > 5 ? std::stof(args[5]) : 0.0;
140
+
141
+ FastText fasttext;
142
+ fasttext.loadModel(model);
143
+
144
+ Meter meter;
145
+
146
+ if (input == "-") {
147
+ fasttext.test(std::cin, k, threshold, meter);
148
+ } else {
149
+ std::ifstream ifs(input);
150
+ if (!ifs.is_open()) {
151
+ std::cerr << "Test file cannot be opened!" << std::endl;
152
+ exit(EXIT_FAILURE);
153
+ }
154
+ fasttext.test(ifs, k, threshold, meter);
155
+ }
156
+
157
+ if (perLabel) {
158
+ std::cout << std::fixed << std::setprecision(6);
159
+ auto writeMetric = [](const std::string& name, double value) {
160
+ std::cout << name << " : ";
161
+ if (std::isfinite(value)) {
162
+ std::cout << value;
163
+ } else {
164
+ std::cout << "--------";
165
+ }
166
+ std::cout << " ";
167
+ };
168
+
169
+ std::shared_ptr<const Dictionary> dict = fasttext.getDictionary();
170
+ for (int32_t labelId = 0; labelId < dict->nlabels(); labelId++) {
171
+ writeMetric("F1-Score", meter.f1Score(labelId));
172
+ writeMetric("Precision", meter.precision(labelId));
173
+ writeMetric("Recall", meter.recall(labelId));
174
+ std::cout << " " << dict->getLabel(labelId) << std::endl;
175
+ }
176
+ }
177
+ meter.writeGeneralMetrics(std::cout, k);
178
+
179
+ exit(0);
180
+ }
181
+
182
+ void printPredictions(
183
+ const std::vector<std::pair<real, std::string>>& predictions,
184
+ bool printProb,
185
+ bool multiline) {
186
+ bool first = true;
187
+ for (const auto& prediction : predictions) {
188
+ if (!first && !multiline) {
189
+ std::cout << " ";
190
+ }
191
+ first = false;
192
+ std::cout << prediction.second;
193
+ if (printProb) {
194
+ std::cout << " " << prediction.first;
195
+ }
196
+ if (multiline) {
197
+ std::cout << std::endl;
198
+ }
199
+ }
200
+ if (!multiline) {
201
+ std::cout << std::endl;
202
+ }
203
+ }
204
+
205
+ void predict(const std::vector<std::string>& args) {
206
+ if (args.size() < 4 || args.size() > 6) {
207
+ printPredictUsage();
208
+ exit(EXIT_FAILURE);
209
+ }
210
+ int32_t k = 1;
211
+ real threshold = 0.0;
212
+ if (args.size() > 4) {
213
+ k = std::stoi(args[4]);
214
+ if (args.size() == 6) {
215
+ threshold = std::stof(args[5]);
216
+ }
217
+ }
218
+
219
+ bool printProb = args[1] == "predict-prob";
220
+ FastText fasttext;
221
+ fasttext.loadModel(std::string(args[2]));
222
+
223
+ std::ifstream ifs;
224
+ std::string infile(args[3]);
225
+ bool inputIsStdIn = infile == "-";
226
+ if (!inputIsStdIn) {
227
+ ifs.open(infile);
228
+ if (!inputIsStdIn && !ifs.is_open()) {
229
+ std::cerr << "Input file cannot be opened!" << std::endl;
230
+ exit(EXIT_FAILURE);
231
+ }
232
+ }
233
+ std::istream& in = inputIsStdIn ? std::cin : ifs;
234
+ std::vector<std::pair<real, std::string>> predictions;
235
+ while (fasttext.predictLine(in, predictions, k, threshold)) {
236
+ printPredictions(predictions, printProb, false);
237
+ }
238
+ if (ifs.is_open()) {
239
+ ifs.close();
240
+ }
241
+
242
+ exit(0);
243
+ }
244
+
245
+ void printWordVectors(const std::vector<std::string> args) {
246
+ if (args.size() != 3) {
247
+ printPrintWordVectorsUsage();
248
+ exit(EXIT_FAILURE);
249
+ }
250
+ FastText fasttext;
251
+ fasttext.loadModel(std::string(args[2]));
252
+ std::string word;
253
+ Vector vec(fasttext.getDimension());
254
+ while (std::cin >> word) {
255
+ fasttext.getWordVector(vec, word);
256
+ std::cout << word << " " << vec << std::endl;
257
+ }
258
+ exit(0);
259
+ }
260
+
261
+ void printSentenceVectors(const std::vector<std::string> args) {
262
+ if (args.size() != 3) {
263
+ printPrintSentenceVectorsUsage();
264
+ exit(EXIT_FAILURE);
265
+ }
266
+ FastText fasttext;
267
+ fasttext.loadModel(std::string(args[2]));
268
+ Vector svec(fasttext.getDimension());
269
+ while (std::cin.peek() != EOF) {
270
+ fasttext.getSentenceVector(std::cin, svec);
271
+ // Don't print sentence
272
+ std::cout << svec << std::endl;
273
+ }
274
+ exit(0);
275
+ }
276
+
277
+ void printNgrams(const std::vector<std::string> args) {
278
+ if (args.size() != 4) {
279
+ printPrintNgramsUsage();
280
+ exit(EXIT_FAILURE);
281
+ }
282
+ FastText fasttext;
283
+ fasttext.loadModel(std::string(args[2]));
284
+
285
+ std::string word(args[3]);
286
+ std::vector<std::pair<std::string, Vector>> ngramVectors =
287
+ fasttext.getNgramVectors(word);
288
+
289
+ for (const auto& ngramVector : ngramVectors) {
290
+ std::cout << ngramVector.first << " " << ngramVector.second << std::endl;
291
+ }
292
+
293
+ exit(0);
294
+ }
295
+
296
+ void nn(const std::vector<std::string> args) {
297
+ int32_t k;
298
+ if (args.size() == 3) {
299
+ k = 10;
300
+ } else if (args.size() == 4) {
301
+ k = std::stoi(args[3]);
302
+ } else {
303
+ printNNUsage();
304
+ exit(EXIT_FAILURE);
305
+ }
306
+ FastText fasttext;
307
+ fasttext.loadModel(std::string(args[2]));
308
+ std::string prompt("Query word? ");
309
+ std::cout << prompt;
310
+
311
+ std::string queryWord;
312
+ while (std::cin >> queryWord) {
313
+ printPredictions(fasttext.getNN(queryWord, k), true, true);
314
+ std::cout << prompt;
315
+ }
316
+ exit(0);
317
+ }
318
+
319
+ void analogies(const std::vector<std::string> args) {
320
+ int32_t k;
321
+ if (args.size() == 3) {
322
+ k = 10;
323
+ } else if (args.size() == 4) {
324
+ k = std::stoi(args[3]);
325
+ } else {
326
+ printAnalogiesUsage();
327
+ exit(EXIT_FAILURE);
328
+ }
329
+ if (k <= 0) {
330
+ throw std::invalid_argument("k needs to be 1 or higher!");
331
+ }
332
+ FastText fasttext;
333
+ std::string model(args[2]);
334
+ std::cout << "Loading model " << model << std::endl;
335
+ fasttext.loadModel(model);
336
+
337
+ std::string prompt("Query triplet (A - B + C)? ");
338
+ std::string wordA, wordB, wordC;
339
+ std::cout << prompt;
340
+ while (true) {
341
+ std::cin >> wordA;
342
+ std::cin >> wordB;
343
+ std::cin >> wordC;
344
+ printPredictions(fasttext.getAnalogies(k, wordA, wordB, wordC), true, true);
345
+
346
+ std::cout << prompt;
347
+ }
348
+ exit(0);
349
+ }
350
+
351
+ void train(const std::vector<std::string> args) {
352
+ Args a = Args();
353
+ a.parseArgs(args);
354
+ FastText fasttext;
355
+ std::string outputFileName(a.output + ".bin");
356
+ std::ofstream ofs(outputFileName);
357
+ if (!ofs.is_open()) {
358
+ throw std::invalid_argument(
359
+ outputFileName + " cannot be opened for saving.");
360
+ }
361
+ ofs.close();
362
+ fasttext.train(a);
363
+ fasttext.saveModel(outputFileName);
364
+ fasttext.saveVectors(a.output + ".vec");
365
+ if (a.saveOutput) {
366
+ fasttext.saveOutput(a.output + ".output");
367
+ }
368
+ }
369
+
370
+ void dump(const std::vector<std::string>& args) {
371
+ if (args.size() < 4) {
372
+ printDumpUsage();
373
+ exit(EXIT_FAILURE);
374
+ }
375
+
376
+ std::string modelPath = args[2];
377
+ std::string option = args[3];
378
+
379
+ FastText fasttext;
380
+ fasttext.loadModel(modelPath);
381
+ if (option == "args") {
382
+ fasttext.getArgs().dump(std::cout);
383
+ } else if (option == "dict") {
384
+ fasttext.getDictionary()->dump(std::cout);
385
+ } else if (option == "input") {
386
+ if (fasttext.isQuant()) {
387
+ std::cerr << "Not supported for quantized models." << std::endl;
388
+ } else {
389
+ fasttext.getInputMatrix()->dump(std::cout);
390
+ }
391
+ } else if (option == "output") {
392
+ if (fasttext.isQuant()) {
393
+ std::cerr << "Not supported for quantized models." << std::endl;
394
+ } else {
395
+ fasttext.getOutputMatrix()->dump(std::cout);
396
+ }
397
+ } else {
398
+ printDumpUsage();
399
+ exit(EXIT_FAILURE);
400
+ }
401
+ }
402
+
403
+ int main(int argc, char** argv) {
404
+ std::vector<std::string> args(argv, argv + argc);
405
+ if (args.size() < 2) {
406
+ printUsage();
407
+ exit(EXIT_FAILURE);
408
+ }
409
+ std::string command(args[1]);
410
+ if (command == "skipgram" || command == "cbow" || command == "supervised") {
411
+ train(args);
412
+ } else if (command == "test" || command == "test-label") {
413
+ test(args);
414
+ } else if (command == "quantize") {
415
+ quantize(args);
416
+ } else if (command == "print-word-vectors") {
417
+ printWordVectors(args);
418
+ } else if (command == "print-sentence-vectors") {
419
+ printSentenceVectors(args);
420
+ } else if (command == "print-ngrams") {
421
+ printNgrams(args);
422
+ } else if (command == "nn") {
423
+ nn(args);
424
+ } else if (command == "analogies") {
425
+ analogies(args);
426
+ } else if (command == "predict" || command == "predict-prob") {
427
+ predict(args);
428
+ } else if (command == "dump") {
429
+ dump(args);
430
+ } else {
431
+ printUsage();
432
+ exit(EXIT_FAILURE);
433
+ }
434
+ return 0;
435
+ }