fasttext 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (498) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/LICENSE.txt +18 -18
  4. data/README.md +39 -12
  5. data/ext/fasttext/ext.cpp +108 -101
  6. data/ext/fasttext/extconf.rb +7 -9
  7. data/lib/fasttext.rb +3 -0
  8. data/lib/fasttext/classifier.rb +25 -7
  9. data/lib/fasttext/vectorizer.rb +7 -2
  10. data/lib/fasttext/version.rb +1 -1
  11. data/vendor/fastText/README.md +3 -3
  12. data/vendor/fastText/src/args.cc +179 -6
  13. data/vendor/fastText/src/args.h +29 -1
  14. data/vendor/fastText/src/autotune.cc +477 -0
  15. data/vendor/fastText/src/autotune.h +89 -0
  16. data/vendor/fastText/src/densematrix.cc +27 -7
  17. data/vendor/fastText/src/densematrix.h +10 -2
  18. data/vendor/fastText/src/fasttext.cc +125 -114
  19. data/vendor/fastText/src/fasttext.h +31 -52
  20. data/vendor/fastText/src/main.cc +32 -13
  21. data/vendor/fastText/src/meter.cc +148 -2
  22. data/vendor/fastText/src/meter.h +24 -2
  23. data/vendor/fastText/src/model.cc +0 -1
  24. data/vendor/fastText/src/real.h +0 -1
  25. data/vendor/fastText/src/utils.cc +25 -0
  26. data/vendor/fastText/src/utils.h +29 -0
  27. data/vendor/fastText/src/vector.cc +0 -1
  28. metadata +16 -539
  29. data/lib/fasttext/ext.bundle +0 -0
  30. data/vendor/fastText/CMakeLists.txt +0 -68
  31. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  32. data/vendor/fastText/CONTRIBUTING.md +0 -32
  33. data/vendor/fastText/MANIFEST.in +0 -5
  34. data/vendor/fastText/Makefile +0 -63
  35. data/vendor/fastText/alignment/README.md +0 -53
  36. data/vendor/fastText/alignment/align.py +0 -145
  37. data/vendor/fastText/alignment/eval.py +0 -60
  38. data/vendor/fastText/alignment/example.sh +0 -51
  39. data/vendor/fastText/alignment/unsup_align.py +0 -109
  40. data/vendor/fastText/alignment/utils.py +0 -154
  41. data/vendor/fastText/classification-example.sh +0 -41
  42. data/vendor/fastText/classification-results.sh +0 -94
  43. data/vendor/fastText/crawl/README.md +0 -26
  44. data/vendor/fastText/crawl/dedup.cc +0 -51
  45. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  46. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  47. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  48. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  49. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  50. data/vendor/fastText/docs/api.md +0 -6
  51. data/vendor/fastText/docs/cheatsheet.md +0 -66
  52. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  53. data/vendor/fastText/docs/dataset.md +0 -6
  54. data/vendor/fastText/docs/english-vectors.md +0 -53
  55. data/vendor/fastText/docs/faqs.md +0 -63
  56. data/vendor/fastText/docs/language-identification.md +0 -47
  57. data/vendor/fastText/docs/options.md +0 -50
  58. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  59. data/vendor/fastText/docs/python-module.md +0 -314
  60. data/vendor/fastText/docs/references.md +0 -41
  61. data/vendor/fastText/docs/supervised-models.md +0 -54
  62. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  63. data/vendor/fastText/docs/support.md +0 -58
  64. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  65. data/vendor/fastText/eval.py +0 -95
  66. data/vendor/fastText/get-wikimedia.sh +0 -79
  67. data/vendor/fastText/python/README.md +0 -322
  68. data/vendor/fastText/python/README.rst +0 -406
  69. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  70. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  71. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  72. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  73. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  74. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  75. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  76. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  77. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  78. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  79. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  80. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  81. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  82. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  83. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  84. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  85. data/vendor/fastText/quantization-example.sh +0 -40
  86. data/vendor/fastText/runtests.py +0 -60
  87. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  88. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  89. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  90. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  91. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  92. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  93. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  94. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  95. data/vendor/fastText/setup.cfg +0 -2
  96. data/vendor/fastText/setup.py +0 -203
  97. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  98. data/vendor/fastText/website/README.md +0 -6
  99. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  100. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  101. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  102. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  103. data/vendor/fastText/website/core/Footer.js +0 -127
  104. data/vendor/fastText/website/package.json +0 -12
  105. data/vendor/fastText/website/pages/en/index.js +0 -286
  106. data/vendor/fastText/website/sidebars.json +0 -18
  107. data/vendor/fastText/website/siteConfig.js +0 -102
  108. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  109. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  110. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  111. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  112. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  113. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  114. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  115. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  116. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  121. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  122. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  123. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  124. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  125. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  126. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  127. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  141. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  142. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  143. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  144. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  145. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  146. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  147. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  148. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  149. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  150. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  151. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  152. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  153. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  154. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  155. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  156. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  157. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  158. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  159. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  161. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  162. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  163. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  164. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  165. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  166. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  167. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  168. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  169. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  170. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  171. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  172. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  173. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  174. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  175. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  176. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  177. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  178. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  179. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  180. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  181. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  182. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  183. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  184. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  185. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  186. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  187. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  188. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  189. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  190. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  191. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  192. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  193. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  194. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  195. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  196. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  197. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  198. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  199. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  200. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  201. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  202. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  203. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  204. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  205. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  206. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  207. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  208. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  209. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  210. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  211. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  212. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  213. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  214. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  215. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  216. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  217. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  218. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  219. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  220. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  221. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  222. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  223. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  224. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  225. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  226. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  227. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  228. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  229. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  230. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  232. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  233. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  237. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  241. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  243. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  245. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  247. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  249. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  251. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  253. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  255. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  257. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  259. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  261. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  263. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  265. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  267. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  269. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  271. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  273. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  275. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  277. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  279. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  280. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  281. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  282. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  283. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  284. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  285. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  286. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  287. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  288. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  289. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  290. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  291. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  299. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  302. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  304. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  308. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  310. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  312. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  314. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  316. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  318. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  322. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  324. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  326. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  332. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  338. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  342. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  392. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  394. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  395. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  396. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  397. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  398. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  402. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  404. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  446. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  447. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  448. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  449. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  450. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  451. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  452. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  453. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  454. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  455. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  456. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  457. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  459. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  460. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  461. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  462. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  463. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  464. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  465. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  466. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  467. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  468. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  469. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  470. data/vendor/fastText/website/static/fasttext.css +0 -48
  471. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  472. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  473. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  474. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  475. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  476. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  477. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  478. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  479. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  480. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  481. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  482. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  483. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  484. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  485. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  486. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  487. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  488. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  489. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  490. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  491. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  492. data/vendor/fastText/website/static/img/model-black.png +0 -0
  493. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  494. data/vendor/fastText/website/static/img/model-red.png +0 -0
  495. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  496. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  497. data/vendor/fastText/wikifil.pl +0 -57
  498. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -0,0 +1,89 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #pragma once
10
+
11
+ #include <istream>
12
+ #include <memory>
13
+ #include <random>
14
+ #include <thread>
15
+ #include <vector>
16
+
17
+ #include "args.h"
18
+ #include "fasttext.h"
19
+
20
+ namespace fasttext {
21
+
22
+ class AutotuneStrategy {
23
+ private:
24
+ Args bestArgs_;
25
+ int maxDuration_;
26
+ std::minstd_rand rng_;
27
+ int trials_;
28
+ int bestMinnIndex_;
29
+ int bestDsubExponent_;
30
+ int bestNonzeroBucket_;
31
+ int originalBucket_;
32
+ std::vector<int> minnChoices_;
33
+ int getIndex(int val, const std::vector<int>& choices);
34
+
35
+ public:
36
+ explicit AutotuneStrategy(
37
+ const Args& args,
38
+ std::minstd_rand::result_type seed);
39
+ Args ask(double elapsed);
40
+ void updateBest(const Args& args);
41
+ };
42
+
43
+ class Autotune {
44
+ protected:
45
+ std::shared_ptr<FastText> fastText_;
46
+ double elapsed_;
47
+ double bestScore_;
48
+ int32_t trials_;
49
+ int32_t sizeConstraintFailed_;
50
+ std::atomic<bool> continueTraining_;
51
+ std::unique_ptr<AutotuneStrategy> strategy_;
52
+ std::thread timer_;
53
+
54
+ bool keepTraining(double maxDuration) const;
55
+ void printInfo(double maxDuration);
56
+ void timer(
57
+ const std::chrono::steady_clock::time_point& start,
58
+ double maxDuration);
59
+ void abort();
60
+ void startTimer(const Args& args);
61
+ double getMetricScore(
62
+ Meter& meter,
63
+ const metric_name& metricName,
64
+ const double metricValue,
65
+ const std::string& metricLabel) const;
66
+ void printArgs(const Args& args, const Args& autotuneArgs);
67
+ void printSkippedArgs(const Args& autotuneArgs);
68
+ bool quantize(Args& args, const Args& autotuneArgs);
69
+ int getCutoffForFileSize(bool qout, bool qnorm, int dsub, int64_t fileSize)
70
+ const;
71
+
72
+ class TimeoutError : public std::runtime_error {
73
+ public:
74
+ TimeoutError() : std::runtime_error("Autotune timed out.") {}
75
+ };
76
+
77
+ public:
78
+ Autotune() = delete;
79
+ explicit Autotune(const std::shared_ptr<FastText>& fastText);
80
+ Autotune(const Autotune&) = delete;
81
+ Autotune(Autotune&&) = delete;
82
+ Autotune& operator=(const Autotune&) = delete;
83
+ Autotune& operator=(Autotune&&) = delete;
84
+ ~Autotune() noexcept = default;
85
+
86
+ void train(const Args& args);
87
+ };
88
+
89
+ } // namespace fasttext
@@ -8,11 +8,10 @@
8
8
 
9
9
  #include "densematrix.h"
10
10
 
11
- #include <exception>
12
11
  #include <random>
13
12
  #include <stdexcept>
13
+ #include <thread>
14
14
  #include <utility>
15
-
16
15
  #include "utils.h"
17
16
  #include "vector.h"
18
17
 
@@ -25,18 +24,39 @@ DenseMatrix::DenseMatrix(int64_t m, int64_t n) : Matrix(m, n), data_(m * n) {}
25
24
  DenseMatrix::DenseMatrix(DenseMatrix&& other) noexcept
26
25
  : Matrix(other.m_, other.n_), data_(std::move(other.data_)) {}
27
26
 
27
+ DenseMatrix::DenseMatrix(int64_t m, int64_t n, real* dataPtr)
28
+ : Matrix(m, n), data_(dataPtr, dataPtr + (m * n)) {}
29
+
28
30
  void DenseMatrix::zero() {
29
31
  std::fill(data_.begin(), data_.end(), 0.0);
30
32
  }
31
33
 
32
- void DenseMatrix::uniform(real a) {
33
- std::minstd_rand rng(1);
34
+ void DenseMatrix::uniformThread(real a, int block, int32_t seed) {
35
+ std::minstd_rand rng(block + seed);
34
36
  std::uniform_real_distribution<> uniform(-a, a);
35
- for (int64_t i = 0; i < (m_ * n_); i++) {
37
+ int64_t blockSize = (m_ * n_) / 10;
38
+ for (int64_t i = blockSize * block;
39
+ i < (m_ * n_) && i < blockSize * (block + 1);
40
+ i++) {
36
41
  data_[i] = uniform(rng);
37
42
  }
38
43
  }
39
44
 
45
+ void DenseMatrix::uniform(real a, unsigned int thread, int32_t seed) {
46
+ if (thread > 1) {
47
+ std::vector<std::thread> threads;
48
+ for (int i = 0; i < thread; i++) {
49
+ threads.push_back(std::thread([=]() { uniformThread(a, i, seed); }));
50
+ }
51
+ for (int32_t i = 0; i < threads.size(); i++) {
52
+ threads[i].join();
53
+ }
54
+ } else {
55
+ // webassembly can't instantiate `std::thread`
56
+ uniformThread(a, 0, seed);
57
+ }
58
+ }
59
+
40
60
  void DenseMatrix::multiplyRow(const Vector& nums, int64_t ib, int64_t ie) {
41
61
  if (ie == -1) {
42
62
  ie = m_;
@@ -73,7 +93,7 @@ real DenseMatrix::l2NormRow(int64_t i) const {
73
93
  norm += at(i, j) * at(i, j);
74
94
  }
75
95
  if (std::isnan(norm)) {
76
- throw std::runtime_error("Encountered NaN.");
96
+ throw EncounteredNaNError();
77
97
  }
78
98
  return std::sqrt(norm);
79
99
  }
@@ -94,7 +114,7 @@ real DenseMatrix::dotRow(const Vector& vec, int64_t i) const {
94
114
  d += at(i, j) * vec[j];
95
115
  }
96
116
  if (std::isnan(d)) {
97
- throw std::runtime_error("Encountered NaN.");
117
+ throw EncounteredNaNError();
98
118
  }
99
119
  return d;
100
120
  }
@@ -8,12 +8,13 @@
8
8
 
9
9
  #pragma once
10
10
 
11
+ #include <assert.h>
11
12
  #include <cstdint>
12
13
  #include <istream>
13
14
  #include <ostream>
15
+ #include <stdexcept>
14
16
  #include <vector>
15
17
 
16
- #include <assert.h>
17
18
  #include "matrix.h"
18
19
  #include "real.h"
19
20
 
@@ -24,10 +25,12 @@ class Vector;
24
25
  class DenseMatrix : public Matrix {
25
26
  protected:
26
27
  std::vector<real> data_;
28
+ void uniformThread(real, int, int32_t);
27
29
 
28
30
  public:
29
31
  DenseMatrix();
30
32
  explicit DenseMatrix(int64_t, int64_t);
33
+ explicit DenseMatrix(int64_t m, int64_t n, real* dataPtr);
31
34
  DenseMatrix(const DenseMatrix&) = default;
32
35
  DenseMatrix(DenseMatrix&&) noexcept;
33
36
  DenseMatrix& operator=(const DenseMatrix&) = delete;
@@ -56,7 +59,7 @@ class DenseMatrix : public Matrix {
56
59
  return n_;
57
60
  }
58
61
  void zero();
59
- void uniform(real);
62
+ void uniform(real, unsigned int, int32_t);
60
63
 
61
64
  void multiplyRow(const Vector& nums, int64_t ib = 0, int64_t ie = -1);
62
65
  void divideRow(const Vector& denoms, int64_t ib = 0, int64_t ie = -1);
@@ -71,5 +74,10 @@ class DenseMatrix : public Matrix {
71
74
  void save(std::ostream&) const override;
72
75
  void load(std::istream&) override;
73
76
  void dump(std::ostream&) const override;
77
+
78
+ class EncounteredNaNError : public std::runtime_error {
79
+ public:
80
+ EncounteredNaNError() : std::runtime_error("Encountered NaN.") {}
81
+ };
74
82
  };
75
83
  } // namespace fasttext
@@ -47,7 +47,8 @@ std::shared_ptr<Loss> FastText::createLoss(std::shared_ptr<Matrix>& output) {
47
47
  }
48
48
  }
49
49
 
50
- FastText::FastText() : quant_(false), wordVectors_(nullptr) {}
50
+ FastText::FastText()
51
+ : quant_(false), wordVectors_(nullptr), trainException_(nullptr) {}
51
52
 
52
53
  void FastText::addInputVector(Vector& vec, int32_t ind) const {
53
54
  vec.addRow(*input_, ind);
@@ -69,6 +70,19 @@ std::shared_ptr<const DenseMatrix> FastText::getInputMatrix() const {
69
70
  return std::dynamic_pointer_cast<DenseMatrix>(input_);
70
71
  }
71
72
 
73
+ void FastText::setMatrices(
74
+ const std::shared_ptr<DenseMatrix>& inputMatrix,
75
+ const std::shared_ptr<DenseMatrix>& outputMatrix) {
76
+ assert(input_->size(1) == output_->size(1));
77
+
78
+ input_ = std::dynamic_pointer_cast<Matrix>(inputMatrix);
79
+ output_ = std::dynamic_pointer_cast<Matrix>(outputMatrix);
80
+ wordVectors_.reset();
81
+ args_->dim = input_->size(1);
82
+
83
+ buildModel();
84
+ }
85
+
72
86
  std::shared_ptr<const DenseMatrix> FastText::getOutputMatrix() const {
73
87
  if (quant_ && args_->qout) {
74
88
  throw std::runtime_error("Can't export quantized matrix");
@@ -86,6 +100,14 @@ int32_t FastText::getSubwordId(const std::string& subword) const {
86
100
  return dict_->nwords() + h;
87
101
  }
88
102
 
103
+ int32_t FastText::getLabelId(const std::string& label) const {
104
+ int32_t labelId = dict_->getId(label);
105
+ if (labelId != -1) {
106
+ labelId -= dict_->nwords();
107
+ }
108
+ return labelId;
109
+ }
110
+
89
111
  void FastText::getWordVector(Vector& vec, const std::string& word) const {
90
112
  const std::vector<int32_t>& ngrams = dict_->getSubwords(word);
91
113
  vec.zero();
@@ -97,10 +119,6 @@ void FastText::getWordVector(Vector& vec, const std::string& word) const {
97
119
  }
98
120
  }
99
121
 
100
- void FastText::getVector(Vector& vec, const std::string& word) const {
101
- getWordVector(vec, word);
102
- }
103
-
104
122
  void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
105
123
  vec.zero();
106
124
  int32_t h = dict_->hash(subword) % args_->bucket;
@@ -109,6 +127,9 @@ void FastText::getSubwordVector(Vector& vec, const std::string& subword) const {
109
127
  }
110
128
 
111
129
  void FastText::saveVectors(const std::string& filename) {
130
+ if (!input_ || !output_) {
131
+ throw std::runtime_error("Model never trained");
132
+ }
112
133
  std::ofstream ofs(filename);
113
134
  if (!ofs.is_open()) {
114
135
  throw std::invalid_argument(
@@ -124,10 +145,6 @@ void FastText::saveVectors(const std::string& filename) {
124
145
  ofs.close();
125
146
  }
126
147
 
127
- void FastText::saveVectors() {
128
- saveVectors(args_->output + ".vec");
129
- }
130
-
131
148
  void FastText::saveOutput(const std::string& filename) {
132
149
  std::ofstream ofs(filename);
133
150
  if (!ofs.is_open()) {
@@ -152,10 +169,6 @@ void FastText::saveOutput(const std::string& filename) {
152
169
  ofs.close();
153
170
  }
154
171
 
155
- void FastText::saveOutput() {
156
- saveOutput(args_->output + ".output");
157
- }
158
-
159
172
  bool FastText::checkModel(std::istream& in) {
160
173
  int32_t magic;
161
174
  in.read((char*)&(magic), sizeof(int32_t));
@@ -176,21 +189,14 @@ void FastText::signModel(std::ostream& out) {
176
189
  out.write((char*)&(version), sizeof(int32_t));
177
190
  }
178
191
 
179
- void FastText::saveModel() {
180
- std::string fn(args_->output);
181
- if (quant_) {
182
- fn += ".ftz";
183
- } else {
184
- fn += ".bin";
185
- }
186
- saveModel(fn);
187
- }
188
-
189
192
  void FastText::saveModel(const std::string& filename) {
190
193
  std::ofstream ofs(filename, std::ofstream::binary);
191
194
  if (!ofs.is_open()) {
192
195
  throw std::invalid_argument(filename + " cannot be opened for saving!");
193
196
  }
197
+ if (!input_ || !output_) {
198
+ throw std::runtime_error("Model never trained");
199
+ }
194
200
  signModel(ofs);
195
201
  args_->save(ofs);
196
202
  dict_->save(ofs);
@@ -224,6 +230,12 @@ std::vector<int64_t> FastText::getTargetCounts() const {
224
230
  }
225
231
  }
226
232
 
233
+ void FastText::buildModel() {
234
+ auto loss = createLoss(output_);
235
+ bool normalizeGradient = (args_->model == model_name::sup);
236
+ model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
237
+ }
238
+
227
239
  void FastText::loadModel(std::istream& in) {
228
240
  args_ = std::make_shared<Args>();
229
241
  input_ = std::make_shared<DenseMatrix>();
@@ -256,37 +268,37 @@ void FastText::loadModel(std::istream& in) {
256
268
  }
257
269
  output_->load(in);
258
270
 
259
- auto loss = createLoss(output_);
260
- bool normalizeGradient = (args_->model == model_name::sup);
261
- model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
271
+ buildModel();
262
272
  }
263
273
 
264
- void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
265
- std::chrono::steady_clock::time_point end = std::chrono::steady_clock::now();
266
- double t =
267
- std::chrono::duration_cast<std::chrono::duration<double>>(end - start_)
268
- .count();
274
+ std::tuple<int64_t, double, double> FastText::progressInfo(real progress) {
275
+ double t = utils::getDuration(start_, std::chrono::steady_clock::now());
269
276
  double lr = args_->lr * (1.0 - progress);
270
277
  double wst = 0;
271
278
 
272
279
  int64_t eta = 2592000; // Default to one month in seconds (720 * 3600)
273
280
 
274
281
  if (progress > 0 && t >= 0) {
275
- progress = progress * 100;
276
- eta = t * (100 - progress) / progress;
282
+ eta = t * (1 - progress) / progress;
277
283
  wst = double(tokenCount_) / t / args_->thread;
278
284
  }
279
- int32_t etah = eta / 3600;
280
- int32_t etam = (eta % 3600) / 60;
285
+
286
+ return std::tuple<double, double, int64_t>(wst, lr, eta);
287
+ }
288
+
289
+ void FastText::printInfo(real progress, real loss, std::ostream& log_stream) {
290
+ double wst;
291
+ double lr;
292
+ int64_t eta;
293
+ std::tie<double, double, int64_t>(wst, lr, eta) = progressInfo(progress);
281
294
 
282
295
  log_stream << std::fixed;
283
296
  log_stream << "Progress: ";
284
- log_stream << std::setprecision(1) << std::setw(5) << progress << "%";
297
+ log_stream << std::setprecision(1) << std::setw(5) << (progress * 100) << "%";
285
298
  log_stream << " words/sec/thread: " << std::setw(7) << int64_t(wst);
286
299
  log_stream << " lr: " << std::setw(9) << std::setprecision(6) << lr;
287
- log_stream << " loss: " << std::setw(9) << std::setprecision(6) << loss;
288
- log_stream << " ETA: " << std::setw(3) << etah;
289
- log_stream << "h" << std::setw(2) << etam << "m";
300
+ log_stream << " avg.loss: " << std::setw(9) << std::setprecision(6) << loss;
301
+ log_stream << " ETA: " << utils::ClockPrint(eta);
290
302
  log_stream << std::flush;
291
303
  }
292
304
 
@@ -299,13 +311,16 @@ std::vector<int32_t> FastText::selectEmbeddings(int32_t cutoff) const {
299
311
  std::iota(idx.begin(), idx.end(), 0);
300
312
  auto eosid = dict_->getId(Dictionary::EOS);
301
313
  std::sort(idx.begin(), idx.end(), [&norms, eosid](size_t i1, size_t i2) {
314
+ if (i1 == eosid && i2 == eosid) { // satisfy strict weak ordering
315
+ return false;
316
+ }
302
317
  return eosid == i1 || (eosid != i2 && norms[i1] > norms[i2]);
303
318
  });
304
319
  idx.erase(idx.begin() + cutoff, idx.end());
305
320
  return idx;
306
321
  }
307
322
 
308
- void FastText::quantize(const Args& qargs) {
323
+ void FastText::quantize(const Args& qargs, const TrainCallback& callback) {
309
324
  if (args_->model != model_name::sup) {
310
325
  throw std::invalid_argument(
311
326
  "For now we only support quantization of supervised models");
@@ -337,10 +352,9 @@ void FastText::quantize(const Args& qargs) {
337
352
  args_->verbose = qargs.verbose;
338
353
  auto loss = createLoss(output_);
339
354
  model_ = std::make_shared<Model>(input, output, loss, normalizeGradient);
340
- startThreads();
355
+ startThreads(callback);
341
356
  }
342
357
  }
343
-
344
358
  input_ = std::make_shared<QuantMatrix>(
345
359
  std::move(*(input.get())), qargs.dsub, qargs.qnorm);
346
360
 
@@ -348,7 +362,6 @@ void FastText::quantize(const Args& qargs) {
348
362
  output_ = std::make_shared<QuantMatrix>(
349
363
  std::move(*(output.get())), 2, qargs.qnorm);
350
364
  }
351
-
352
365
  quant_ = true;
353
366
  auto loss = createLoss(output_);
354
367
  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
@@ -408,7 +421,7 @@ void FastText::skipgram(
408
421
 
409
422
  std::tuple<int64_t, double, double>
410
423
  FastText::test(std::istream& in, int32_t k, real threshold) {
411
- Meter meter;
424
+ Meter meter(false);
412
425
  test(in, k, threshold, meter);
413
426
 
414
427
  return std::tuple<int64_t, double, double>(
@@ -420,6 +433,9 @@ void FastText::test(std::istream& in, int32_t k, real threshold, Meter& meter)
420
433
  std::vector<int32_t> line;
421
434
  std::vector<int32_t> labels;
422
435
  Predictions predictions;
436
+ Model::State state(args_->dim, dict_->nlabels(), 0);
437
+ in.clear();
438
+ in.seekg(0, std::ios_base::beg);
423
439
 
424
440
  while (in.peek() != EOF) {
425
441
  line.clear();
@@ -521,16 +537,6 @@ std::vector<std::pair<std::string, Vector>> FastText::getNgramVectors(
521
537
  return result;
522
538
  }
523
539
 
524
- // deprecated. use getNgramVectors instead
525
- void FastText::ngramVectors(std::string word) {
526
- std::vector<std::pair<std::string, Vector>> ngramVectors =
527
- getNgramVectors(word);
528
-
529
- for (const auto& ngramVector : ngramVectors) {
530
- std::cout << ngramVector.first << " " << ngramVector.second << std::endl;
531
- }
532
- }
533
-
534
540
  void FastText::precomputeWordVectors(DenseMatrix& wordVectors) {
535
541
  Vector vec(args_->dim);
536
542
  wordVectors.zero();
@@ -598,17 +604,6 @@ std::vector<std::pair<real, std::string>> FastText::getNN(
598
604
  return heap;
599
605
  }
600
606
 
601
- // depracted. use getNN instead
602
- void FastText::findNN(
603
- const DenseMatrix& wordVectors,
604
- const Vector& query,
605
- int32_t k,
606
- const std::set<std::string>& banSet,
607
- std::vector<std::pair<real, std::string>>& results) {
608
- results.clear();
609
- results = getNN(wordVectors, query, k, banSet);
610
- }
611
-
612
607
  std::vector<std::pair<real, std::string>> FastText::getAnalogies(
613
608
  int32_t k,
614
609
  const std::string& wordA,
@@ -630,52 +625,52 @@ std::vector<std::pair<real, std::string>> FastText::getAnalogies(
630
625
  return getNN(*wordVectors_, query, k, {wordA, wordB, wordC});
631
626
  }
632
627
 
633
- // depreacted, use getAnalogies instead
634
- void FastText::analogies(int32_t k) {
635
- std::string prompt("Query triplet (A - B + C)? ");
636
- std::string wordA, wordB, wordC;
637
- std::cout << prompt;
638
- while (true) {
639
- std::cin >> wordA;
640
- std::cin >> wordB;
641
- std::cin >> wordC;
642
- auto results = getAnalogies(k, wordA, wordB, wordC);
643
-
644
- for (auto& pair : results) {
645
- std::cout << pair.second << " " << pair.first << std::endl;
646
- }
647
- std::cout << prompt;
648
- }
628
+ bool FastText::keepTraining(const int64_t ntokens) const {
629
+ return tokenCount_ < args_->epoch * ntokens && !trainException_;
649
630
  }
650
631
 
651
- void FastText::trainThread(int32_t threadId) {
632
+ void FastText::trainThread(int32_t threadId, const TrainCallback& callback) {
652
633
  std::ifstream ifs(args_->input);
653
634
  utils::seek(ifs, threadId * utils::size(ifs) / args_->thread);
654
635
 
655
- Model::State state(args_->dim, output_->size(0), threadId);
636
+ Model::State state(args_->dim, output_->size(0), threadId + args_->seed);
656
637
 
657
638
  const int64_t ntokens = dict_->ntokens();
658
639
  int64_t localTokenCount = 0;
659
640
  std::vector<int32_t> line, labels;
660
- while (tokenCount_ < args_->epoch * ntokens) {
661
- real progress = real(tokenCount_) / (args_->epoch * ntokens);
662
- real lr = args_->lr * (1.0 - progress);
663
- if (args_->model == model_name::sup) {
664
- localTokenCount += dict_->getLine(ifs, line, labels);
665
- supervised(state, lr, line, labels);
666
- } else if (args_->model == model_name::cbow) {
667
- localTokenCount += dict_->getLine(ifs, line, state.rng);
668
- cbow(state, lr, line);
669
- } else if (args_->model == model_name::sg) {
670
- localTokenCount += dict_->getLine(ifs, line, state.rng);
671
- skipgram(state, lr, line);
672
- }
673
- if (localTokenCount > args_->lrUpdateRate) {
674
- tokenCount_ += localTokenCount;
675
- localTokenCount = 0;
676
- if (threadId == 0 && args_->verbose > 1)
677
- loss_ = state.getLoss();
641
+ uint64_t callbackCounter = 0;
642
+ try {
643
+ while (keepTraining(ntokens)) {
644
+ real progress = real(tokenCount_) / (args_->epoch * ntokens);
645
+ if (callback && ((callbackCounter++ % 64) == 0)) {
646
+ double wst;
647
+ double lr;
648
+ int64_t eta;
649
+ std::tie<double, double, int64_t>(wst, lr, eta) =
650
+ progressInfo(progress);
651
+ callback(progress, loss_, wst, lr, eta);
652
+ }
653
+ real lr = args_->lr * (1.0 - progress);
654
+ if (args_->model == model_name::sup) {
655
+ localTokenCount += dict_->getLine(ifs, line, labels);
656
+ supervised(state, lr, line, labels);
657
+ } else if (args_->model == model_name::cbow) {
658
+ localTokenCount += dict_->getLine(ifs, line, state.rng);
659
+ cbow(state, lr, line);
660
+ } else if (args_->model == model_name::sg) {
661
+ localTokenCount += dict_->getLine(ifs, line, state.rng);
662
+ skipgram(state, lr, line);
663
+ }
664
+ if (localTokenCount > args_->lrUpdateRate) {
665
+ tokenCount_ += localTokenCount;
666
+ localTokenCount = 0;
667
+ if (threadId == 0 && args_->verbose > 1) {
668
+ loss_ = state.getLoss();
669
+ }
670
+ }
678
671
  }
672
+ } catch (DenseMatrix::EncounteredNaNError&) {
673
+ trainException_ = std::current_exception();
679
674
  }
680
675
  if (threadId == 0)
681
676
  loss_ = state.getLoss();
@@ -713,7 +708,7 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
713
708
  dict_->init();
714
709
  std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
715
710
  dict_->nwords() + args_->bucket, args_->dim);
716
- input->uniform(1.0 / args_->dim);
711
+ input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
717
712
 
718
713
  for (size_t i = 0; i < n; i++) {
719
714
  int32_t idx = dict_->getId(words[i]);
@@ -727,14 +722,10 @@ std::shared_ptr<Matrix> FastText::getInputMatrixFromFile(
727
722
  return input;
728
723
  }
729
724
 
730
- void FastText::loadVectors(const std::string& filename) {
731
- input_ = getInputMatrixFromFile(filename);
732
- }
733
-
734
725
  std::shared_ptr<Matrix> FastText::createRandomMatrix() const {
735
726
  std::shared_ptr<DenseMatrix> input = std::make_shared<DenseMatrix>(
736
727
  dict_->nwords() + args_->bucket, args_->dim);
737
- input->uniform(1.0 / args_->dim);
728
+ input->uniform(1.0 / args_->dim, args_->thread, args_->seed);
738
729
 
739
730
  return input;
740
731
  }
@@ -749,7 +740,7 @@ std::shared_ptr<Matrix> FastText::createTrainOutputMatrix() const {
749
740
  return output;
750
741
  }
751
742
 
752
- void FastText::train(const Args& args) {
743
+ void FastText::train(const Args& args, const TrainCallback& callback) {
753
744
  args_ = std::make_shared<Args>(args);
754
745
  dict_ = std::make_shared<Dictionary>(args_);
755
746
  if (args_->input == "-") {
@@ -770,23 +761,38 @@ void FastText::train(const Args& args) {
770
761
  input_ = createRandomMatrix();
771
762
  }
772
763
  output_ = createTrainOutputMatrix();
764
+ quant_ = false;
773
765
  auto loss = createLoss(output_);
774
766
  bool normalizeGradient = (args_->model == model_name::sup);
775
767
  model_ = std::make_shared<Model>(input_, output_, loss, normalizeGradient);
776
- startThreads();
768
+ startThreads(callback);
769
+ }
770
+
771
+ void FastText::abort() {
772
+ try {
773
+ throw AbortError();
774
+ } catch (AbortError&) {
775
+ trainException_ = std::current_exception();
776
+ }
777
777
  }
778
778
 
779
- void FastText::startThreads() {
779
+ void FastText::startThreads(const TrainCallback& callback) {
780
780
  start_ = std::chrono::steady_clock::now();
781
781
  tokenCount_ = 0;
782
782
  loss_ = -1;
783
+ trainException_ = nullptr;
783
784
  std::vector<std::thread> threads;
784
- for (int32_t i = 0; i < args_->thread; i++) {
785
- threads.push_back(std::thread([=]() { trainThread(i); }));
785
+ if (args_->thread > 1) {
786
+ for (int32_t i = 0; i < args_->thread; i++) {
787
+ threads.push_back(std::thread([=]() { trainThread(i, callback); }));
788
+ }
789
+ } else {
790
+ // webassembly can't instantiate `std::thread`
791
+ trainThread(0, callback);
786
792
  }
787
793
  const int64_t ntokens = dict_->ntokens();
788
794
  // Same condition as trainThread
789
- while (tokenCount_ < args_->epoch * ntokens) {
795
+ while (keepTraining(ntokens)) {
790
796
  std::this_thread::sleep_for(std::chrono::milliseconds(100));
791
797
  if (loss_ >= 0 && args_->verbose > 1) {
792
798
  real progress = real(tokenCount_) / (args_->epoch * ntokens);
@@ -794,9 +800,14 @@ void FastText::startThreads() {
794
800
  printInfo(progress, loss_, std::cerr);
795
801
  }
796
802
  }
797
- for (int32_t i = 0; i < args_->thread; i++) {
803
+ for (int32_t i = 0; i < threads.size(); i++) {
798
804
  threads[i].join();
799
805
  }
806
+ if (trainException_) {
807
+ std::exception_ptr exception = trainException_;
808
+ trainException_ = nullptr;
809
+ std::rethrow_exception(exception);
810
+ }
800
811
  if (args_->verbose > 0) {
801
812
  std::cerr << "\r";
802
813
  printInfo(1.0, loss_, std::cerr);