fasttext 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (498) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/LICENSE.txt +18 -18
  4. data/README.md +39 -12
  5. data/ext/fasttext/ext.cpp +108 -101
  6. data/ext/fasttext/extconf.rb +7 -9
  7. data/lib/fasttext.rb +3 -0
  8. data/lib/fasttext/classifier.rb +25 -7
  9. data/lib/fasttext/vectorizer.rb +7 -2
  10. data/lib/fasttext/version.rb +1 -1
  11. data/vendor/fastText/README.md +3 -3
  12. data/vendor/fastText/src/args.cc +179 -6
  13. data/vendor/fastText/src/args.h +29 -1
  14. data/vendor/fastText/src/autotune.cc +477 -0
  15. data/vendor/fastText/src/autotune.h +89 -0
  16. data/vendor/fastText/src/densematrix.cc +27 -7
  17. data/vendor/fastText/src/densematrix.h +10 -2
  18. data/vendor/fastText/src/fasttext.cc +125 -114
  19. data/vendor/fastText/src/fasttext.h +31 -52
  20. data/vendor/fastText/src/main.cc +32 -13
  21. data/vendor/fastText/src/meter.cc +148 -2
  22. data/vendor/fastText/src/meter.h +24 -2
  23. data/vendor/fastText/src/model.cc +0 -1
  24. data/vendor/fastText/src/real.h +0 -1
  25. data/vendor/fastText/src/utils.cc +25 -0
  26. data/vendor/fastText/src/utils.h +29 -0
  27. data/vendor/fastText/src/vector.cc +0 -1
  28. metadata +16 -539
  29. data/lib/fasttext/ext.bundle +0 -0
  30. data/vendor/fastText/CMakeLists.txt +0 -68
  31. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  32. data/vendor/fastText/CONTRIBUTING.md +0 -32
  33. data/vendor/fastText/MANIFEST.in +0 -5
  34. data/vendor/fastText/Makefile +0 -63
  35. data/vendor/fastText/alignment/README.md +0 -53
  36. data/vendor/fastText/alignment/align.py +0 -145
  37. data/vendor/fastText/alignment/eval.py +0 -60
  38. data/vendor/fastText/alignment/example.sh +0 -51
  39. data/vendor/fastText/alignment/unsup_align.py +0 -109
  40. data/vendor/fastText/alignment/utils.py +0 -154
  41. data/vendor/fastText/classification-example.sh +0 -41
  42. data/vendor/fastText/classification-results.sh +0 -94
  43. data/vendor/fastText/crawl/README.md +0 -26
  44. data/vendor/fastText/crawl/dedup.cc +0 -51
  45. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  46. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  47. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  48. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  49. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  50. data/vendor/fastText/docs/api.md +0 -6
  51. data/vendor/fastText/docs/cheatsheet.md +0 -66
  52. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  53. data/vendor/fastText/docs/dataset.md +0 -6
  54. data/vendor/fastText/docs/english-vectors.md +0 -53
  55. data/vendor/fastText/docs/faqs.md +0 -63
  56. data/vendor/fastText/docs/language-identification.md +0 -47
  57. data/vendor/fastText/docs/options.md +0 -50
  58. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  59. data/vendor/fastText/docs/python-module.md +0 -314
  60. data/vendor/fastText/docs/references.md +0 -41
  61. data/vendor/fastText/docs/supervised-models.md +0 -54
  62. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  63. data/vendor/fastText/docs/support.md +0 -58
  64. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  65. data/vendor/fastText/eval.py +0 -95
  66. data/vendor/fastText/get-wikimedia.sh +0 -79
  67. data/vendor/fastText/python/README.md +0 -322
  68. data/vendor/fastText/python/README.rst +0 -406
  69. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  70. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  71. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  72. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  73. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  74. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  75. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  76. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  77. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  78. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  79. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  80. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  81. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  82. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  83. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  84. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  85. data/vendor/fastText/quantization-example.sh +0 -40
  86. data/vendor/fastText/runtests.py +0 -60
  87. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  88. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  89. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  90. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  91. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  92. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  93. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  94. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  95. data/vendor/fastText/setup.cfg +0 -2
  96. data/vendor/fastText/setup.py +0 -203
  97. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  98. data/vendor/fastText/website/README.md +0 -6
  99. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  100. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  101. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  102. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  103. data/vendor/fastText/website/core/Footer.js +0 -127
  104. data/vendor/fastText/website/package.json +0 -12
  105. data/vendor/fastText/website/pages/en/index.js +0 -286
  106. data/vendor/fastText/website/sidebars.json +0 -18
  107. data/vendor/fastText/website/siteConfig.js +0 -102
  108. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  109. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  110. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  111. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  112. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  113. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  114. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  115. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  116. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  121. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  122. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  123. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  124. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  125. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  126. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  127. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  141. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  142. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  143. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  144. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  145. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  146. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  147. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  148. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  149. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  150. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  151. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  152. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  153. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  154. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  155. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  156. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  157. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  158. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  159. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  161. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  162. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  163. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  164. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  165. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  166. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  167. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  168. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  169. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  170. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  171. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  172. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  173. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  174. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  175. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  176. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  177. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  178. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  179. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  180. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  181. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  182. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  183. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  184. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  185. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  186. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  187. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  188. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  189. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  190. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  191. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  192. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  193. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  194. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  195. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  196. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  197. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  198. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  199. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  200. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  201. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  202. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  203. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  204. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  205. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  206. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  207. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  208. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  209. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  210. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  211. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  212. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  213. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  214. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  215. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  216. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  217. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  218. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  219. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  220. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  221. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  222. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  223. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  224. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  225. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  226. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  227. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  228. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  229. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  230. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  232. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  233. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  237. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  241. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  243. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  245. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  247. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  249. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  251. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  253. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  255. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  257. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  259. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  261. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  263. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  265. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  267. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  269. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  271. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  273. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  275. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  277. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  279. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  280. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  281. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  282. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  283. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  284. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  285. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  286. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  287. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  288. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  289. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  290. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  291. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  299. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  302. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  304. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  308. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  310. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  312. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  314. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  316. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  318. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  322. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  324. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  326. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  332. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  338. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  342. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  392. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  394. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  395. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  396. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  397. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  398. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  402. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  404. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  446. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  447. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  448. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  449. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  450. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  451. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  452. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  453. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  454. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  455. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  456. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  457. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  459. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  460. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  461. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  462. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  463. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  464. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  465. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  466. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  467. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  468. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  469. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  470. data/vendor/fastText/website/static/fasttext.css +0 -48
  471. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  472. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  473. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  474. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  475. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  476. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  477. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  478. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  479. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  480. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  481. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  482. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  483. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  484. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  485. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  486. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  487. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  488. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  489. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  490. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  491. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  492. data/vendor/fastText/website/static/img/model-black.png +0 -0
  493. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  494. data/vendor/fastText/website/static/img/model-red.png +0 -0
  495. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  496. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  497. data/vendor/fastText/wikifil.pl +0 -57
  498. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #pragma once
10
+
11
+ #include <istream>
12
+ #include <memory>
13
+ #include <random>
14
+ #include <thread>
15
+ #include <vector>
16
+
17
+ #include "args.h"
18
+ #include "fasttext.h"
19
+
20
+ namespace fasttext {
21
+
22
+ class AutotuneStrategy {
23
+ private:
24
+ Args bestArgs_;
25
+ int maxDuration_;
26
+ std::minstd_rand rng_;
27
+ int trials_;
28
+ int bestMinnIndex_;
29
+ int bestDsubExponent_;
30
+ int bestNonzeroBucket_;
31
+ int originalBucket_;
32
+ std::vector<int> minnChoices_;
33
+ int getIndex(int val, const std::vector<int>& choices);
34
+
35
+ public:
36
+ explicit AutotuneStrategy(
37
+ const Args& args,
38
+ std::minstd_rand::result_type seed);
39
+ Args ask(double elapsed);
40
+ void updateBest(const Args& args);
41
+ };
42
+
43
+ class Autotune {
44
+ protected:
45
+ std::shared_ptr<FastText> fastText_;
46
+ double elapsed_;
47
+ double bestScore_;
48
+ int32_t trials_;
49
+ int32_t sizeConstraintFailed_;
50
+ std::atomic<bool> continueTraining_;
51
+ std::unique_ptr<AutotuneStrategy> strategy_;
52
+ std::thread timer_;
53
+
54
+ bool keepTraining(double maxDuration) const;
55
+ void printInfo(double maxDuration);
56
+ void timer(
57
+ const std::chrono::steady_clock::time_point& start,
58
+ double maxDuration);
59
+ void abort();
60
+ void startTimer(const Args& args);
61
+ double getMetricScore(
62
+ Meter& meter,
63
+ const metric_name& metricName,
64
+ const double metricValue,
65
+ const std::string& metricLabel) const;
66
+ void printArgs(const Args& args, const Args& autotuneArgs);
67
+ void printSkippedArgs(const Args& autotuneArgs);
68
+ bool quantize(Args& args, const Args& autotuneArgs);
69
+ int getCutoffForFileSize(bool qout, bool qnorm, int dsub, int64_t fileSize)
70
+ const;
71
+
72
+ class TimeoutError : public std::runtime_error {
73
+ public:
74
+ TimeoutError() : std::runtime_error("Autotune timed out.") {}
75
+ };
76
+
77
+ public:
78
+ Autotune() = delete;
79
+ explicit Autotune(const std::shared_ptr<FastText>& fastText);
80
+ Autotune(const Autotune&) = delete;
81
+ Autotune(Autotune&&) = delete;
82
+ Autotune& operator=(const Autotune&) = delete;
83
+ Autotune& operator=(Autotune&&) = delete;
84
+ ~Autotune() noexcept = default;
85
+
86
+ void train(const Args& args);
87
+ };
88
+
89
+ } // namespace fasttext
@@ -8,11 +8,10 @@
8
8
 
9
9
  #include "densematrix.h"
10
10
 
11
- #include <exception>
12
11
  #include <random>
13
12
  #include <stdexcept>
13
+ #include <thread>
14
14
  #include <utility>
15
-
16
15
  #include "utils.h"
17
16
  #include "vector.h"
18
17
 
@@ -25,18 +24,39 @@ DenseMatrix::DenseMatrix(int64_t m, int64_t n) : Matrix(m, n), data_(m * n) {}
25
24
  DenseMatrix::DenseMatrix(DenseMatrix&& other) noexcept
26
25
  : Matrix(other.m_, other.n_), data_(std::move(other.data_)) {}
27
26
 
27
+ DenseMatrix::DenseMatrix(int64_t m, int64_t n, real* dataPtr)
28
+ : Matrix(m, n), data_(dataPtr, dataPtr + (m * n)) {}
29
+
28
30
  void DenseMatrix::zero() {
29
31
  std::fill(data_.begin(), data_.end(), 0.0);
30
32
  }
31
33
 
32
- void DenseMatrix::uniform(real a) {
33
- std::minstd_rand rng(1);
34
+ void DenseMatrix::uniformThread(real a, int block, int32_t seed) {
35
+ std::minstd_rand rng(block + seed);
34
36
  std::uniform_real_distribution<> uniform(-a, a);
35
- for (int64_t i = 0; i < (m_ * n_); i++) {
37
+ int64_t blockSize = (m_ * n_) / 10;
38
+ for (int64_t i = blockSize * block;
39
+ i < (m_ * n_) && i < blockSize * (block + 1);
40
+ i++) {
36
41
  data_[i] = uniform(rng);
37
42
  }
38
43
  }
39
44
 
45
+ void DenseMatrix::uniform(real a, unsigned int thread, int32_t seed) {
46
+ if (thread > 1) {
47
+ std::vector<std::thread> threads;
48
+ for (int i = 0; i < thread; i++) {
49
+ threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
50
+ }
51
+ for (int32_t i = 0; i < threads.size(); i++) {
52
+ threads[i].join();
53
+ }
54
+ } else {
55
+ // webassembly can't instantiate `std::thread`
56
+ uniformThread(a, 0, seed);
57
+ }
58
+ }
59
+
40
60
  void DenseMatrix::multiplyRow(const Vector& nums, int64_t ib, int64_t ie) {
41
61
  if (ie == -1) {
42
62
  ie = m_;
@@ -73,7 +93,7 @@ real DenseMatrix::l2NormRow(int64_t i) const {
73
93
  norm += at(i, j) * at(i, j);
74
94
  }
75
95
  if (std::isnan(norm)) {
76
- throw std::runtime_error("Encountered NaN.");
96
+ throw EncounteredNaNError();
77
97
  }
78
98
  return std::sqrt(norm);
79
99
  }
@@ -94,7 +114,7 @@ real DenseMatrix::dotRow(const Vector& vec, int64_t i) const {
94
114
  d += at(i, j) * vec[j];
95
115
  }
96
116
  if (std::isnan(d)) {
97
- throw std::runtime_error("Encountered NaN.");
117
+ throw EncounteredNaNError();
98
118
  }
99
119
  return d;
100
120
  }
@@ -8,12 +8,13 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include <assert.h>
11
12
  #include <cstdint>
12
13
  #include <istream>
13
14
  #include <ostream>
15
+ #include <stdexcept>
14
16
  #include <vector>
15
17
 
16
- #include <assert.h>
17
18
  #include "matrix.h"
18
19
  #include "real.h"
19
20
 
@@ -24,10 +25,12 @@ class Vector;
24
25
  class DenseMatrix : public Matrix {
25
26
  protected:
26
27
  std::vector<real> data_;
28
+ void uniformThread(real, int, int32_t);
27
29
 
28
30
  public:
29
31
  DenseMatrix();
30
32
  explicit DenseMatrix(int64_t, int64_t);
33
+ explicit DenseMatrix(int64_t m, int64_t n, real* dataPtr);
31
34
  DenseMatrix(const DenseMatrix&) = default;
32
35
  DenseMatrix(DenseMatrix&&) noexcept;
33
36
  DenseMatrix& operator=(const DenseMatrix&) = delete;
@@ -56,7 +59,7 @@ class DenseMatrix : public Matrix {
56
59
  return n_;
57
60
  }
58
61
  void zero();
59
- void uniform(real);
62
+ void uniform(real, unsigned int, int32_t);
60
63
 
61
64
  void multiplyRow(const Vector& nums, int64_t ib = 0, int64_t ie = -1);
62
65
  void divideRow(const Vector& denoms, int64_t ib = 0, int64_t ie = -1);
@@ -71,5 +74,10 @@ class DenseMatrix : public Matrix {
71
74
  void save(std::ostream&) const override;
72
75
  void load(std::istream&) override;
73
76
  void dump(std::ostream&) const override;
77
+
78
+ class EncounteredNaNError : public std::runtime_error {
79
+ public:
80
+ EncounteredNaNError() : std::runtime_error("Encountered NaN.") {}
81
+ };
74
82
  };
75
83
  } // namespace fasttext
@@ -47,7 +47,8 @@ std::shared_ptr<Loss> FastText::createLoss(std::shared_ptr<Matrix>& output) {
47
47
  }
48
48
  }
49
49
 
50
- FastText::FastText() : quant_(false), wordVectors_(nullptr) {}
50
+ FastText::FastText()
51
+ : quant_(false), wordVectors_(nullptr), trainException_(nullptr) {}
51
52
 
52
53
  void FastText::addInputVector(Vector& vec, int32_t ind) const {
53
54
  vec.addRow(*input_, ind);
@@ -69,6 +70,19 @@ std::shared_ptr<const DenseMatrix> FastText::getInputMatrix() const {
69
70
  return std::dynamic_pointer_cast<DenseMatrix>(input_);
70
71
  }
71
72
 
73
+ void FastText::setMatrices(
74
+ const std::shared_ptr<DenseMatrix>& inputMatrix,
75
+ const std::shared_ptr<DenseMatrix>& outputMatrix) {
76
+ assert(input_->size(1) == output_->size(1));
77
+
78
+ input_ = std::dynamic_pointer_cast<Matrix>(inputMatrix);
79
+ output_ = std::dynamic_pointer_cast<Matrix>(outputMatrix);
80
+ wordVectors_.reset();
81
+ args_->dim = input_->size(1);
82
+
83
+ buildModel();
84
+ }
85
+
72
86
  std::shared_ptr<const DenseMatrix> FastText::getOutputMatrix() const {
73
87
  if (quant_ && args_->qout) {
74
88
  throw std::runtime_error("Can't export quantized matrix");
@@ -86,6 +100,14 @@ int32_t FastText::getSubwordId(const std::string& subword) const {
86
100
  return dict_->nwords() + h;
87
101
  }
88
102
 
103
+ int32_t FastText::getLabelId(const std::string& label) const {
104
+ int32_t labelId = dict_->getId(label);
105
+ if (labelId != -1) {
106
+ labelId -= dict_->nwords();
107
+ }
108
+ return labelId;
109
+ }
110
+
89
111
  void FastText::getWordVector(Vector& vec, const std::string& word) const {
90
112
  const std::vector<int32_t>& ngrams = dict_->getSubwords(word);
91
113
  vec.zero();
@@ -97,10 +119,6 @@ void FastText::getWordVector(Vector& vec, const std::string& word) const {
97
119
  }
98
120
  }
99
121
 
100
- void FastText::getVector(Vector& vec, const std::string& word) const {
101
- getWordVector(vec, word);
102
- }
103
-
104
122
  void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
105
123
  vec.zero();
106
124
  int32_t h = dict_->hash(subword) % args_->bucket;
@@ -109,6 +127,9 @@ void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
109
127
  }
110
128
 
111
129
  void FastText::saveVectors(const std::string& filename) {
130
+ if (!input_ || !output_) {
131
+ throw std::runtime_error("Model never trained");
132
+ }
112
133
  std::ofstream ofs(filename);
113
134
  if (!ofs.is_open()) {
114
135
  throw std::invalid_argument(
@@ -124,10 +145,6 @@ void FastText::saveVectors(const std::string& filename) {
124
145
  ofs.close();
125
146
  }
126
147
 
127
- void FastText::saveVectors() {
128
- saveVectors(args_->output + ".vec");
129
- }
130
-
131
148
  void FastText::saveOutput(const std::string& filename) {
132
149
  std::ofstream ofs(filename);
133
150
  if (!ofs.is_open()) {
@@ -152,10 +169,6 @@ void FastText::saveOutput(const std::string& filename) {
152
169
  ofs.close();
153
170
  }
154
171
 
155
- void FastText::saveOutput() {
156
- saveOutput(args_->output + ".output");
157
- }
158
-
159
172
  bool FastText::checkModel(std::istream& in) {
160
173
  int32_t magic;
161
174
  in.read((char*)&(magic), sizeof(int32_t));
@@ -176,21 +189,14 @@ void FastText::signModel(std::ostream& out) {
176
189
  out.write((char*)&(version), sizeof(int32_t));
177
190
  }
178
191
 
179
- void FastText::saveModel() {
180
- std::string fn(args_->output);
181
- if (quant_) {
182
- fn += ".ftz";
183
- } else {
184
- fn += ".bin";
185
- }
186
- saveModel(fn);
187
- }
188
-
189
192
  void FastText::saveModel(const std::string& filename) {
190
193
  std::ofstream ofs(filename, std::ofstream::binary);
191
194
  if (!ofs.is_open()) {
192
195
  throw std::invalid_argument(filename + " cannot be opened for saving!");
193
196
  }
197
+ if (!input_ || !output_) {
198
+ throw std::runtime_error("Model never trained");
199
+ }
194
200
  signModel(ofs);
195
201
  args_->save(ofs);
196
202
  dict_->save(ofs);
@@ -224,6 +230,12 @@ std::vector<int64_t> FastText::getTargetCounts() const {
224
230
  }
225
231
  }
226
232
 
233
+ void FastText::buildModel() {
234
+ auto loss = createLoss(output_);
235
+ bool normalizeGradient = (args_->model == model_name::sup);
236
+ model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
237
+ }
238
+
227
239
  void FastText::loadModel(std::istream& in) {
228
240
  args_ = std::make_shared<Args>();
229
241
  input_ = std::make_shared<DenseMatrix>();
@@ -256,37 +268,37 @@ void FastText::loadModel(std::istream& in) {
256
268
  }
257
269
  output_->load(in);
258
270
 
259
- auto loss = createLoss(output_);
260
- bool normalizeGradient = (args_->model == model_name::sup);
261
- model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
271
+ buildModel();
262
272
  }
263
273
 
264
- void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
265
- std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
266
- double t =
267
- std::chrono::duration_cast<std::chrono::duration<double>>(end - start_)
268
- .count();
274
+ std::tuple<int64_t, double, double> FastText::progressInfo(real progress) {
275
+ double t = utils::getDuration(start_, std::chrono::steady_clock::now());
269
276
  double lr = args_->lr * (1.0 - progress);
270
277
  double wst = 0;
271
278
 
272
279
  int64_t eta = 2592000; // Default to one month in seconds (720 * 3600)
273
280
 
274
281
  if (progress > 0 && t >= 0) {
275
- progress = progress * 100;
276
- eta = t * (100 - progress) / progress;
282
+ eta = t * (1 - progress) / progress;
277
283
  wst = double(tokenCount_) / t / args_->thread;
278
284
  }
279
- int32_t etah = eta / 3600;
280
- int32_t etam = (eta % 3600) / 60;
285
+
286
+ return std::tuple<double, double, int64_t>(wst, lr, eta);
287
+ }
288
+
289
+ void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
290
+ double wst;
291
+ double lr;
292
+ int64_t eta;
293
+ std::tie<double, double, int64_t>(wst, lr, eta) = progressInfo(progress);
281
294
 
282
295
  log_stream << std::fixed;
283
296
  log_stream << "Progress: ";
284
- log_stream << std::setprecision(1) << std::setw(5) << progress << "%";
297
+ log_stream << std::setprecision(1) << std::setw(5) << (progress * 100) << "%";
285
298
  log_stream << " words/sec/thread: " << std::setw(7) << int64_t(wst);
286
299
  log_stream << " lr: " << std::setw(9) << std::setprecision(6) << lr;
287
- log_stream << " loss: " << std::setw(9) << std::setprecision(6) << loss;
288
- log_stream << " ETA: " << std::setw(3) << etah;
289
- log_stream << "h" << std::setw(2) << etam << "m";
300
+ log_stream << " avg.loss: " << std::setw(9) << std::setprecision(6) << loss;
301
+ log_stream << " ETA: " << utils::ClockPrint(eta);
290
302
  log_stream << std::flush;
291
303
  }
292
304
 
@@ -299,13 +311,16 @@ std::vector<int32_t> FastText::selectEmbeddings(int32_t cutoff) const {
299
311
  std::iota(idx.begin(), idx.end(), 0);
300
312
  auto eosid = dict_->getId(Dictionary::EOS);
301
313
  std::sort(idx.begin(), idx.end(), [&norms, eosid](size_t i1, size_t i2) {
314
+ if (i1 == eosid && i2 == eosid) { // satisfy strict weak ordering
315
+ return false;
316
+ }
302
317
  return eosid == i1 || (eosid != i2 && norms[i1] > norms[i2]);
303
318
  });
304
319
  idx.erase(idx.begin() + cutoff, idx.end());
305
320
  return idx;
306
321
  }
307
322
 
308
- void FastText::quantize(const Args& qargs) {
323
+ void FastText::quantize(const Args& qargs, const TrainCallback& callback) {
309
324
  if (args_->model != model_name::sup) {
310
325
  throw std::invalid_argument(
311
326
  "For now we only support quantization of supervised models");
@@ -337,10 +352,9 @@ void FastText::quantize(const Args& qargs) {
337
352
  args_->verbose = qargs.verbose;
338
353
  auto loss = createLoss(output_);
339
354
  model_ = std::make_shared<Model>(input, output, loss, normalizeGradient);
340
- startThreads();
355
+ startThreads(callback);
341
356
  }
342
357
  }
343
-
344
358
  input_ = std::make_shared<QuantMatrix>(
345
359
  std::move(*(input.get())), qargs.dsub, qargs.qnorm);
346
360
 
@@ -348,7 +362,6 @@ void FastText::quantize(const Args& qargs) {
348
362
  output_ = std::make_shared<QuantMatrix>(
349
363
  std::move(*(output.get())), 2, qargs.qnorm);
350
364
  }
351
-
352
365
  quant_ = true;
353
366
  auto loss = createLoss(output_);
354
367
  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
@@ -408,7 +421,7 @@ void FastText::skipgram(
408
421
 
409
422
  std::tuple<int64_t, double, double>
410
423
  FastText::test(std::istream& in, int32_t k, real threshold) {
411
- Meter meter;
424
+ Meter meter(false);
412
425
  test(in, k, threshold, meter);
413
426
 
414
427
  return std::tuple<int64_t, double, double>(
@@ -420,6 +433,9 @@ void FastText::test(std::istream& in, int32_t k, real threshold, Meter& meter)
420
433
  std::vector<int32_t> line;
421
434
  std::vector<int32_t> labels;
422
435
  Predictions predictions;
436
+ Model::State state(args_->dim, dict_->nlabels(), 0);
437
+ in.clear();
438
+ in.seekg(0, std::ios_base::beg);
423
439
 
424
440
  while (in.peek() != EOF) {
425
441
  line.clear();
@@ -521,16 +537,6 @@ std::vector<std::pair<std::string, Vector>> FastText::getNgramVectors(
521
537
  return result;
522
538
  }
523
539
 
524
- // deprecated. use getNgramVectors instead
525
- void FastText::ngramVectors(std::string word) {
526
- std::vector<std::pair<std::string, Vector>> ngramVectors =
527
- getNgramVectors(word);
528
-
529
- for (const auto& ngramVector : ngramVectors) {
530
- std::cout << ngramVector.first << " " << ngramVector.second << std::endl;
531
- }
532
- }
533
-
534
540
  void FastText::precomputeWordVectors(DenseMatrix& wordVectors) {
535
541
  Vector vec(args_->dim);
536
542
  wordVectors.zero();
@@ -598,17 +604,6 @@ std::vector<std::pair<real, std::string>> FastText::getNN(
598
604
  return heap;
599
605
  }
600
606
 
601
- // depracted. use getNN instead
602
- void FastText::findNN(
603
- const DenseMatrix& wordVectors,
604
- const Vector& query,
605
- int32_t k,
606
- const std::set<std::string>& banSet,
607
- std::vector<std::pair<real, std::string>>& results) {
608
- results.clear();
609
- results = getNN(wordVectors, query, k, banSet);
610
- }
611
-
612
607
  std::vector<std::pair<real, std::string>> FastText::getAnalogies(
613
608
  int32_t k,
614
609
  const std::string& wordA,
@@ -630,52 +625,52 @@ std::vector<std::pair<real, std::string>> FastText::getAnalogies(
630
625
  return getNN(*wordVectors_, query, k, {wordA, wordB, wordC});
631
626
  }
632
627
 
633
- // depreacted, use getAnalogies instead
634
- void FastText::analogies(int32_t k) {
635
- std::string prompt("Query triplet (A - B + C)? ");
636
- std::string wordA, wordB, wordC;
637
- std::cout << prompt;
638
- while (true) {
639
- std::cin >> wordA;
640
- std::cin >> wordB;
641
- std::cin >> wordC;
642
- auto results = getAnalogies(k, wordA, wordB, wordC);
643
-
644
- for (auto& pair : results) {
645
- std::cout << pair.second << " " << pair.first << std::endl;
646
- }
647
- std::cout << prompt;
648
- }
628
+ bool FastText::keepTraining(const int64_t ntokens) const {
629
+ return tokenCount_ < args_->epoch * ntokens && !trainException_;
649
630
  }
650
631
 
651
- void FastText::trainThread(int32_t threadId) {
632
+ void FastText::trainThread(int32_t threadId, const TrainCallback& callback) {
652
633
  std::ifstream ifs(args_->input);
653
634
  utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
654
635
 
655
- Model::State state(args_->dim, output_->size(0), threadId);
636
+ Model::State state(args_->dim, output_->size(0), threadId + args_->seed);
656
637
 
657
638
  const int64_t ntokens = dict_->ntokens();
658
639
  int64_t localTokenCount = 0;
659
640
  std::vector<int32_t> line, labels;
660
- while (tokenCount_ < args_->epoch * ntokens) {
661
- real progress = real(tokenCount_) / (args_->epoch * ntokens);
662
- real lr = args_->lr * (1.0 - progress);
663
- if (args_->model == model_name::sup) {
664
- localTokenCount += dict_->getLine(ifs, line, labels);
665
- supervised(state, lr, line, labels);
666
- } else if (args_->model == model_name::cbow) {
667
- localTokenCount += dict_->getLine(ifs, line, state.rng);
668
- cbow(state, lr, line);
669
- } else if (args_->model == model_name::sg) {
670
- localTokenCount += dict_->getLine(ifs, line, state.rng);
671
- skipgram(state, lr, line);
672
- }
673
- if (localTokenCount > args_->lrUpdateRate) {
674
- tokenCount_ += localTokenCount;
675
- localTokenCount = 0;
676
- if (threadId == 0 && args_->verbose > 1)
677
- loss_ = state.getLoss();
641
+ uint64_t callbackCounter = 0;
642
+ try {
643
+ while (keepTraining(ntokens)) {
644
+ real progress = real(tokenCount_) / (args_->epoch * ntokens);
645
+ if (callback && ((callbackCounter++ % 64) == 0)) {
646
+ double wst;
647
+ double lr;
648
+ int64_t eta;
649
+ std::tie<double, double, int64_t>(wst, lr, eta) =
650
+ progressInfo(progress);
651
+ callback(progress, loss_, wst, lr, eta);
652
+ }
653
+ real lr = args_->lr * (1.0 - progress);
654
+ if (args_->model == model_name::sup) {
655
+ localTokenCount += dict_->getLine(ifs, line, labels);
656
+ supervised(state, lr, line, labels);
657
+ } else if (args_->model == model_name::cbow) {
658
+ localTokenCount += dict_->getLine(ifs, line, state.rng);
659
+ cbow(state, lr, line);
660
+ } else if (args_->model == model_name::sg) {
661
+ localTokenCount += dict_->getLine(ifs, line, state.rng);
662
+ skipgram(state, lr, line);
663
+ }
664
+ if (localTokenCount > args_->lrUpdateRate) {
665
+ tokenCount_ += localTokenCount;
666
+ localTokenCount = 0;
667
+ if (threadId == 0 && args_->verbose > 1) {
668
+ loss_ = state.getLoss();
669
+ }
670
+ }
678
671
  }
672
+ } catch (DenseMatrix::EncounteredNaNError&) {
673
+ trainException_ = std::current_exception();
679
674
  }
680
675
  if (threadId == 0)
681
676
  loss_ = state.getLoss();
@@ -713,7 +708,7 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
713
708
  dict_->init();
714
709
  std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
715
710
  dict_->nwords() + args_->bucket, args_->dim);
716
- input->uniform(1.0 / args_->dim);
711
+ input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
717
712
 
718
713
  for (size_t i = 0; i < n; i++) {
719
714
  int32_t idx = dict_->getId(words[i]);
@@ -727,14 +722,10 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
727
722
  return input;
728
723
  }
729
724
 
730
- void FastText::loadVectors(const std::string& filename) {
731
- input_ = getInputMatrixFromFile(filename);
732
- }
733
-
734
725
  std::shared_ptr<Matrix> FastText::createRandomMatrix() const {
735
726
  std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
736
727
  dict_->nwords() + args_->bucket, args_->dim);
737
- input->uniform(1.0 / args_->dim);
728
+ input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
738
729
 
739
730
  return input;
740
731
  }
@@ -749,7 +740,7 @@ std::shared_ptr<Matrix> FastText::createTrainOutputMatrix() const {
749
740
  return output;
750
741
  }
751
742
 
752
- void FastText::train(const Args& args) {
743
+ void FastText::train(const Args& args, const TrainCallback& callback) {
753
744
  args_ = std::make_shared<Args>(args);
754
745
  dict_ = std::make_shared<Dictionary>(args_);
755
746
  if (args_->input == "-") {
@@ -770,23 +761,38 @@ void FastText::train(const Args& args) {
770
761
  input_ = createRandomMatrix();
771
762
  }
772
763
  output_ = createTrainOutputMatrix();
764
+ quant_ = false;
773
765
  auto loss = createLoss(output_);
774
766
  bool normalizeGradient = (args_->model == model_name::sup);
775
767
  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
776
- startThreads();
768
+ startThreads(callback);
769
+ }
770
+
771
+ void FastText::abort() {
772
+ try {
773
+ throw AbortError();
774
+ } catch (AbortError&) {
775
+ trainException_ = std::current_exception();
776
+ }
777
777
  }
778
778
 
779
- void FastText::startThreads() {
779
+ void FastText::startThreads(const TrainCallback& callback) {
780
780
  start_ = std::chrono::steady_clock::now();
781
781
  tokenCount_ = 0;
782
782
  loss_ = -1;
783
+ trainException_ = nullptr;
783
784
  std::vector<std::thread> threads;
784
- for (int32_t i = 0; i < args_->thread; i++) {
785
- threads.push_back(std::thread([=]() { trainThread(i); }));
785
+ if (args_->thread > 1) {
786
+ for (int32_t i = 0; i < args_->thread; i++) {
787
+ threads.push_back(std::thread([=]() { trainThread(i, callback); }));
788
+ }
789
+ } else {
790
+ // webassembly can't instantiate `std::thread`
791
+ trainThread(0, callback);
786
792
  }
787
793
  const int64_t ntokens = dict_->ntokens();
788
794
  // Same condition as trainThread
789
- while (tokenCount_ < args_->epoch * ntokens) {
795
+ while (keepTraining(ntokens)) {
790
796
  std::this_thread::sleep_for(std::chrono::milliseconds(100));
791
797
  if (loss_ >= 0 && args_->verbose > 1) {
792
798
  real progress = real(tokenCount_) / (args_->epoch * ntokens);
@@ -794,9 +800,14 @@ void FastText::startThreads() {
794
800
  printInfo(progress, loss_, std::cerr);
795
801
  }
796
802
  }
797
- for (int32_t i = 0; i < args_->thread; i++) {
803
+ for (int32_t i = 0; i < threads.size(); i++) {
798
804
  threads[i].join();
799
805
  }
806
+ if (trainException_) {
807
+ std::exception_ptr exception = trainException_;
808
+ trainException_ = nullptr;
809
+ std::rethrow_exception(exception);
810
+ }
800
811
  if (args_->verbose > 0) {
801
812
  std::cerr << "\r";
802
813
  printInfo(1.0, loss_, std::cerr);