fasttext 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (510) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +3 -0
  3. data/LICENSE.txt +22 -0
  4. data/README.md +251 -0
  5. data/ext/fasttext/ext.cpp +291 -0
  6. data/ext/fasttext/extconf.rb +15 -0
  7. data/lib/fasttext.rb +41 -0
  8. data/lib/fasttext/classifier.rb +92 -0
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/lib/fasttext/model.rb +60 -0
  11. data/lib/fasttext/vectorizer.rb +58 -0
  12. data/lib/fasttext/version.rb +3 -0
  13. data/vendor/fastText/CMakeLists.txt +68 -0
  14. data/vendor/fastText/CODE_OF_CONDUCT.md +2 -0
  15. data/vendor/fastText/CONTRIBUTING.md +32 -0
  16. data/vendor/fastText/LICENSE +21 -0
  17. data/vendor/fastText/MANIFEST.in +5 -0
  18. data/vendor/fastText/Makefile +63 -0
  19. data/vendor/fastText/README.md +339 -0
  20. data/vendor/fastText/alignment/README.md +53 -0
  21. data/vendor/fastText/alignment/align.py +145 -0
  22. data/vendor/fastText/alignment/eval.py +60 -0
  23. data/vendor/fastText/alignment/example.sh +51 -0
  24. data/vendor/fastText/alignment/unsup_align.py +109 -0
  25. data/vendor/fastText/alignment/utils.py +154 -0
  26. data/vendor/fastText/classification-example.sh +41 -0
  27. data/vendor/fastText/classification-results.sh +94 -0
  28. data/vendor/fastText/crawl/README.md +26 -0
  29. data/vendor/fastText/crawl/dedup.cc +51 -0
  30. data/vendor/fastText/crawl/download_crawl.sh +57 -0
  31. data/vendor/fastText/crawl/filter_dedup.sh +13 -0
  32. data/vendor/fastText/crawl/filter_utf8.cc +105 -0
  33. data/vendor/fastText/crawl/process_wet_file.sh +30 -0
  34. data/vendor/fastText/docs/aligned-vectors.md +64 -0
  35. data/vendor/fastText/docs/api.md +6 -0
  36. data/vendor/fastText/docs/cheatsheet.md +66 -0
  37. data/vendor/fastText/docs/crawl-vectors.md +125 -0
  38. data/vendor/fastText/docs/dataset.md +6 -0
  39. data/vendor/fastText/docs/english-vectors.md +53 -0
  40. data/vendor/fastText/docs/faqs.md +63 -0
  41. data/vendor/fastText/docs/language-identification.md +47 -0
  42. data/vendor/fastText/docs/options.md +50 -0
  43. data/vendor/fastText/docs/pretrained-vectors.md +142 -0
  44. data/vendor/fastText/docs/python-module.md +314 -0
  45. data/vendor/fastText/docs/references.md +41 -0
  46. data/vendor/fastText/docs/supervised-models.md +54 -0
  47. data/vendor/fastText/docs/supervised-tutorial.md +349 -0
  48. data/vendor/fastText/docs/support.md +58 -0
  49. data/vendor/fastText/docs/unsupervised-tutorials.md +309 -0
  50. data/vendor/fastText/eval.py +95 -0
  51. data/vendor/fastText/get-wikimedia.sh +79 -0
  52. data/vendor/fastText/python/README.md +322 -0
  53. data/vendor/fastText/python/README.rst +406 -0
  54. data/vendor/fastText/python/benchmarks/README.rst +3 -0
  55. data/vendor/fastText/python/benchmarks/get_word_vector.py +49 -0
  56. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +81 -0
  57. data/vendor/fastText/python/doc/examples/bin_to_vec.py +41 -0
  58. data/vendor/fastText/python/doc/examples/compute_accuracy.py +163 -0
  59. data/vendor/fastText/python/doc/examples/get_vocab.py +48 -0
  60. data/vendor/fastText/python/doc/examples/train_supervised.py +42 -0
  61. data/vendor/fastText/python/doc/examples/train_unsupervised.py +56 -0
  62. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +468 -0
  63. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +22 -0
  64. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +388 -0
  65. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +14 -0
  66. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +239 -0
  67. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +629 -0
  68. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +13 -0
  69. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +60 -0
  70. data/vendor/fastText/quantization-example.sh +40 -0
  71. data/vendor/fastText/runtests.py +60 -0
  72. data/vendor/fastText/scripts/kbcompletion/README.md +19 -0
  73. data/vendor/fastText/scripts/kbcompletion/data.sh +69 -0
  74. data/vendor/fastText/scripts/kbcompletion/eval.cpp +108 -0
  75. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +49 -0
  76. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +45 -0
  77. data/vendor/fastText/scripts/kbcompletion/svo.sh +38 -0
  78. data/vendor/fastText/scripts/kbcompletion/wn18.sh +49 -0
  79. data/vendor/fastText/scripts/quantization/quantization-results.sh +43 -0
  80. data/vendor/fastText/setup.cfg +2 -0
  81. data/vendor/fastText/setup.py +203 -0
  82. data/vendor/fastText/src/args.cc +320 -0
  83. data/vendor/fastText/src/args.h +68 -0
  84. data/vendor/fastText/src/densematrix.cc +155 -0
  85. data/vendor/fastText/src/densematrix.h +75 -0
  86. data/vendor/fastText/src/dictionary.cc +540 -0
  87. data/vendor/fastText/src/dictionary.h +111 -0
  88. data/vendor/fastText/src/fasttext.cc +821 -0
  89. data/vendor/fastText/src/fasttext.h +191 -0
  90. data/vendor/fastText/src/loss.cc +346 -0
  91. data/vendor/fastText/src/loss.h +163 -0
  92. data/vendor/fastText/src/main.cc +435 -0
  93. data/vendor/fastText/src/matrix.cc +25 -0
  94. data/vendor/fastText/src/matrix.h +44 -0
  95. data/vendor/fastText/src/meter.cc +68 -0
  96. data/vendor/fastText/src/meter.h +69 -0
  97. data/vendor/fastText/src/model.cc +98 -0
  98. data/vendor/fastText/src/model.h +79 -0
  99. data/vendor/fastText/src/productquantizer.cc +251 -0
  100. data/vendor/fastText/src/productquantizer.h +63 -0
  101. data/vendor/fastText/src/quantmatrix.cc +117 -0
  102. data/vendor/fastText/src/quantmatrix.h +60 -0
  103. data/vendor/fastText/src/real.h +15 -0
  104. data/vendor/fastText/src/utils.cc +28 -0
  105. data/vendor/fastText/src/utils.h +43 -0
  106. data/vendor/fastText/src/vector.cc +97 -0
  107. data/vendor/fastText/src/vector.h +61 -0
  108. data/vendor/fastText/tests/fetch_test_data.sh +202 -0
  109. data/vendor/fastText/website/README.md +6 -0
  110. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +42 -0
  111. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +60 -0
  112. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +90 -0
  113. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +168 -0
  114. data/vendor/fastText/website/core/Footer.js +127 -0
  115. data/vendor/fastText/website/package.json +12 -0
  116. data/vendor/fastText/website/pages/en/index.js +286 -0
  117. data/vendor/fastText/website/sidebars.json +18 -0
  118. data/vendor/fastText/website/siteConfig.js +102 -0
  119. data/vendor/fastText/website/static/docs/en/html/annotated.html +115 -0
  120. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +4 -0
  121. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +113 -0
  122. data/vendor/fastText/website/static/docs/en/html/args_8h.html +134 -0
  123. data/vendor/fastText/website/static/docs/en/html/args_8h.js +14 -0
  124. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +139 -0
  125. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  126. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  127. data/vendor/fastText/website/static/docs/en/html/classes.html +121 -0
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +140 -0
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +753 -0
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +40 -0
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +148 -0
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +1266 -0
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +43 -0
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +145 -0
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +1149 -0
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +45 -0
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +123 -0
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +610 -0
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +23 -0
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +150 -0
  141. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +1400 -0
  142. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +48 -0
  143. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +131 -0
  144. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +950 -0
  145. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +31 -0
  146. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +122 -0
  147. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +565 -0
  148. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +22 -0
  149. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +121 -0
  150. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +542 -0
  151. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +21 -0
  152. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  153. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +116 -0
  154. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +142 -0
  155. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +10 -0
  156. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +127 -0
  157. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +145 -0
  158. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +29 -0
  159. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/doxygen.css +1596 -0
  161. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  162. data/vendor/fastText/website/static/docs/en/html/dynsections.js +97 -0
  163. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +119 -0
  164. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +168 -0
  165. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +6 -0
  166. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +155 -0
  167. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  168. data/vendor/fastText/website/static/docs/en/html/files.html +125 -0
  169. data/vendor/fastText/website/static/docs/en/html/files.js +4 -0
  170. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  171. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  172. data/vendor/fastText/website/static/docs/en/html/functions.html +139 -0
  173. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +112 -0
  174. data/vendor/fastText/website/static/docs/en/html/functions_b.html +115 -0
  175. data/vendor/fastText/website/static/docs/en/html/functions_c.html +143 -0
  176. data/vendor/fastText/website/static/docs/en/html/functions_d.html +135 -0
  177. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +27 -0
  178. data/vendor/fastText/website/static/docs/en/html/functions_e.html +115 -0
  179. data/vendor/fastText/website/static/docs/en/html/functions_f.html +112 -0
  180. data/vendor/fastText/website/static/docs/en/html/functions_func.html +563 -0
  181. data/vendor/fastText/website/static/docs/en/html/functions_g.html +145 -0
  182. data/vendor/fastText/website/static/docs/en/html/functions_h.html +112 -0
  183. data/vendor/fastText/website/static/docs/en/html/functions_i.html +121 -0
  184. data/vendor/fastText/website/static/docs/en/html/functions_k.html +106 -0
  185. data/vendor/fastText/website/static/docs/en/html/functions_l.html +140 -0
  186. data/vendor/fastText/website/static/docs/en/html/functions_m.html +153 -0
  187. data/vendor/fastText/website/static/docs/en/html/functions_n.html +164 -0
  188. data/vendor/fastText/website/static/docs/en/html/functions_o.html +116 -0
  189. data/vendor/fastText/website/static/docs/en/html/functions_p.html +161 -0
  190. data/vendor/fastText/website/static/docs/en/html/functions_q.html +135 -0
  191. data/vendor/fastText/website/static/docs/en/html/functions_r.html +116 -0
  192. data/vendor/fastText/website/static/docs/en/html/functions_s.html +159 -0
  193. data/vendor/fastText/website/static/docs/en/html/functions_t.html +138 -0
  194. data/vendor/fastText/website/static/docs/en/html/functions_u.html +106 -0
  195. data/vendor/fastText/website/static/docs/en/html/functions_v.html +106 -0
  196. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +486 -0
  197. data/vendor/fastText/website/static/docs/en/html/functions_w.html +124 -0
  198. data/vendor/fastText/website/static/docs/en/html/functions_z.html +104 -0
  199. data/vendor/fastText/website/static/docs/en/html/globals.html +170 -0
  200. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +113 -0
  201. data/vendor/fastText/website/static/docs/en/html/globals_func.html +155 -0
  202. data/vendor/fastText/website/static/docs/en/html/index.html +100 -0
  203. data/vendor/fastText/website/static/docs/en/html/jquery.js +87 -0
  204. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +582 -0
  205. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +22 -0
  206. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +114 -0
  207. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +121 -0
  208. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +123 -0
  209. data/vendor/fastText/website/static/docs/en/html/menu.js +26 -0
  210. data/vendor/fastText/website/static/docs/en/html/menudata.js +90 -0
  211. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +113 -0
  212. data/vendor/fastText/website/static/docs/en/html/model_8h.html +183 -0
  213. data/vendor/fastText/website/static/docs/en/html/model_8h.js +8 -0
  214. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +139 -0
  215. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +343 -0
  216. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +13 -0
  217. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +158 -0
  218. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +125 -0
  219. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +107 -0
  220. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +110 -0
  221. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +104 -0
  222. data/vendor/fastText/website/static/docs/en/html/namespaces.html +106 -0
  223. data/vendor/fastText/website/static/docs/en/html/namespaces.js +4 -0
  224. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  225. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  226. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  227. data/vendor/fastText/website/static/docs/en/html/navtree.css +146 -0
  228. data/vendor/fastText/website/static/docs/en/html/navtree.js +517 -0
  229. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +40 -0
  230. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +253 -0
  231. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +139 -0
  232. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  233. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +118 -0
  234. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +4 -0
  235. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +124 -0
  236. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +133 -0
  237. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +112 -0
  238. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +126 -0
  239. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +128 -0
  240. data/vendor/fastText/website/static/docs/en/html/real_8h.html +117 -0
  241. data/vendor/fastText/website/static/docs/en/html/real_8h.js +4 -0
  242. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +103 -0
  243. data/vendor/fastText/website/static/docs/en/html/resize.js +114 -0
  244. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +26 -0
  245. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +17 -0
  246. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +26 -0
  247. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +8 -0
  248. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +26 -0
  249. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +10 -0
  250. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +26 -0
  251. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +25 -0
  252. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +26 -0
  253. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +15 -0
  254. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +26 -0
  255. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +7 -0
  256. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +26 -0
  257. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +7 -0
  258. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +26 -0
  259. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +11 -0
  260. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +26 -0
  261. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +4 -0
  262. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +26 -0
  263. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +7 -0
  264. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +26 -0
  265. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +17 -0
  266. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +26 -0
  267. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +17 -0
  268. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +26 -0
  269. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +10 -0
  270. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +26 -0
  271. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +12 -0
  272. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +26 -0
  273. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +18 -0
  274. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +26 -0
  275. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +8 -0
  276. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +26 -0
  277. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +11 -0
  278. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +26 -0
  279. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +5 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +26 -0
  281. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +17 -0
  282. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +26 -0
  283. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +27 -0
  284. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +26 -0
  285. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +26 -0
  286. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +26 -0
  287. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +9 -0
  288. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +26 -0
  289. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +35 -0
  290. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +26 -0
  291. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +16 -0
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +26 -0
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +4 -0
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +26 -0
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +4 -0
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +26 -0
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +4 -0
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +26 -0
  299. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +4 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +26 -0
  301. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +5 -0
  302. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +26 -0
  303. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +4 -0
  304. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +26 -0
  305. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +4 -0
  306. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +26 -0
  307. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +4 -0
  308. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +26 -0
  309. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +4 -0
  310. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  311. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +26 -0
  312. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +5 -0
  313. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +26 -0
  314. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +4 -0
  315. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +26 -0
  316. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +4 -0
  317. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +26 -0
  318. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +4 -0
  319. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +26 -0
  320. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +4 -0
  321. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +26 -0
  322. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +4 -0
  323. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +26 -0
  324. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +4 -0
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +26 -0
  326. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +4 -0
  327. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +26 -0
  328. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +4 -0
  329. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +26 -0
  330. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +4 -0
  331. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +26 -0
  332. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +4 -0
  333. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +26 -0
  334. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +6 -0
  335. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +26 -0
  336. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +4 -0
  337. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +26 -0
  338. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +5 -0
  339. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +26 -0
  340. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +5 -0
  341. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +26 -0
  342. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +5 -0
  343. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +26 -0
  344. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +8 -0
  345. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +26 -0
  346. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +5 -0
  347. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +26 -0
  348. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +5 -0
  349. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +26 -0
  350. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +4 -0
  351. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +26 -0
  352. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +5 -0
  353. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +26 -0
  354. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +5 -0
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +26 -0
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +14 -0
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +26 -0
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +5 -0
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +26 -0
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +5 -0
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +26 -0
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +18 -0
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +26 -0
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +8 -0
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +26 -0
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +5 -0
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +26 -0
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +4 -0
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +26 -0
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +4 -0
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +26 -0
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +4 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +26 -0
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +7 -0
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +26 -0
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +11 -0
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +26 -0
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +9 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +26 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +4 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +26 -0
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +7 -0
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +26 -0
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +17 -0
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +26 -0
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +5 -0
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +26 -0
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +8 -0
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +26 -0
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +4 -0
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +26 -0
  392. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +8 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +26 -0
  394. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +10 -0
  395. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +26 -0
  396. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +10 -0
  397. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +26 -0
  398. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +6 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +26 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +26 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +26 -0
  402. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +6 -0
  403. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  404. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +26 -0
  405. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +5 -0
  406. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +12 -0
  407. data/vendor/fastText/website/static/docs/en/html/search/search.css +271 -0
  408. data/vendor/fastText/website/static/docs/en/html/search/search.js +791 -0
  409. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  410. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  411. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  412. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +42 -0
  413. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +26 -0
  414. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +4 -0
  415. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +26 -0
  416. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +4 -0
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +26 -0
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +4 -0
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +26 -0
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +6 -0
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +26 -0
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +8 -0
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +26 -0
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +11 -0
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +26 -0
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +4 -0
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +26 -0
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +10 -0
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +26 -0
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +9 -0
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +26 -0
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +9 -0
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +26 -0
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +7 -0
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +26 -0
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +4 -0
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +26 -0
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +5 -0
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +26 -0
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +5 -0
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +26 -0
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +4 -0
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +26 -0
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +10 -0
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +26 -0
  446. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +14 -0
  447. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +26 -0
  448. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +17 -0
  449. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +26 -0
  450. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +6 -0
  451. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +26 -0
  452. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +10 -0
  453. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +26 -0
  454. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +11 -0
  455. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +26 -0
  456. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +6 -0
  457. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +108 -0
  459. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +194 -0
  460. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +8 -0
  461. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +107 -0
  462. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +178 -0
  463. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +7 -0
  464. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  465. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  466. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  467. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  468. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  469. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  470. data/vendor/fastText/website/static/docs/en/html/tabs.css +1 -0
  471. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +121 -0
  472. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +5 -0
  473. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +122 -0
  474. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +5 -0
  475. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +104 -0
  476. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +121 -0
  477. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +4 -0
  478. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +126 -0
  479. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +5 -0
  480. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +120 -0
  481. data/vendor/fastText/website/static/fasttext.css +48 -0
  482. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  483. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  484. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  485. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  486. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  487. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  488. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  489. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  490. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  491. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  492. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  493. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  494. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  495. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  496. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  497. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  498. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  499. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  500. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  501. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  502. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  503. data/vendor/fastText/website/static/img/model-black.png +0 -0
  504. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  505. data/vendor/fastText/website/static/img/model-red.png +0 -0
  506. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  507. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  508. data/vendor/fastText/wikifil.pl +57 -0
  509. data/vendor/fastText/word-vector-example.sh +39 -0
  510. metadata +621 -0
@@ -0,0 +1,68 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #pragma once
10
+
11
+ #include <istream>
12
+ #include <ostream>
13
+ #include <string>
14
+ #include <vector>
15
+
16
+ namespace fasttext {
17
+
18
+ enum class model_name : int { cbow = 1, sg, sup };
19
+ enum class loss_name : int { hs = 1, ns, softmax, ova };
20
+
21
+ class Args {
22
+ protected:
23
+ std::string lossToString(loss_name) const;
24
+ std::string boolToString(bool) const;
25
+ std::string modelToString(model_name) const;
26
+
27
+ public:
28
+ Args();
29
+ std::string input;
30
+ std::string output;
31
+ double lr;
32
+ int lrUpdateRate;
33
+ int dim;
34
+ int ws;
35
+ int epoch;
36
+ int minCount;
37
+ int minCountLabel;
38
+ int neg;
39
+ int wordNgrams;
40
+ loss_name loss;
41
+ model_name model;
42
+ int bucket;
43
+ int minn;
44
+ int maxn;
45
+ int thread;
46
+ double t;
47
+ std::string label;
48
+ int verbose;
49
+ std::string pretrainedVectors;
50
+ bool saveOutput;
51
+
52
+ bool qout;
53
+ bool retrain;
54
+ bool qnorm;
55
+ size_t cutoff;
56
+ size_t dsub;
57
+
58
+ void parseArgs(const std::vector<std::string>& args);
59
+ void printHelp();
60
+ void printBasicHelp();
61
+ void printDictionaryHelp();
62
+ void printTrainingHelp();
63
+ void printQuantizationHelp();
64
+ void save(std::ostream&);
65
+ void load(std::istream&);
66
+ void dump(std::ostream&) const;
67
+ };
68
+ } // namespace fasttext
@@ -0,0 +1,155 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #include "densematrix.h"
10
+
11
+ #include <exception>
12
+ #include <random>
13
+ #include <stdexcept>
14
+ #include <utility>
15
+
16
+ #include "utils.h"
17
+ #include "vector.h"
18
+
19
+ namespace fasttext {
20
+
21
+ DenseMatrix::DenseMatrix() : DenseMatrix(0, 0) {}
22
+
23
+ DenseMatrix::DenseMatrix(int64_t m, int64_t n) : Matrix(m, n), data_(m * n) {}
24
+
25
+ DenseMatrix::DenseMatrix(DenseMatrix&& other) noexcept
26
+ : Matrix(other.m_, other.n_), data_(std::move(other.data_)) {}
27
+
28
+ void DenseMatrix::zero() {
29
+ std::fill(data_.begin(), data_.end(), 0.0);
30
+ }
31
+
32
+ void DenseMatrix::uniform(real a) {
33
+ std::minstd_rand rng(1);
34
+ std::uniform_real_distribution<> uniform(-a, a);
35
+ for (int64_t i = 0; i < (m_ * n_); i++) {
36
+ data_[i] = uniform(rng);
37
+ }
38
+ }
39
+
40
+ void DenseMatrix::multiplyRow(const Vector& nums, int64_t ib, int64_t ie) {
41
+ if (ie == -1) {
42
+ ie = m_;
43
+ }
44
+ assert(ie <= nums.size());
45
+ for (auto i = ib; i < ie; i++) {
46
+ real n = nums[i - ib];
47
+ if (n != 0) {
48
+ for (auto j = 0; j < n_; j++) {
49
+ at(i, j) *= n;
50
+ }
51
+ }
52
+ }
53
+ }
54
+
55
+ void DenseMatrix::divideRow(const Vector& denoms, int64_t ib, int64_t ie) {
56
+ if (ie == -1) {
57
+ ie = m_;
58
+ }
59
+ assert(ie <= denoms.size());
60
+ for (auto i = ib; i < ie; i++) {
61
+ real n = denoms[i - ib];
62
+ if (n != 0) {
63
+ for (auto j = 0; j < n_; j++) {
64
+ at(i, j) /= n;
65
+ }
66
+ }
67
+ }
68
+ }
69
+
70
+ real DenseMatrix::l2NormRow(int64_t i) const {
71
+ auto norm = 0.0;
72
+ for (auto j = 0; j < n_; j++) {
73
+ norm += at(i, j) * at(i, j);
74
+ }
75
+ if (std::isnan(norm)) {
76
+ throw std::runtime_error("Encountered NaN.");
77
+ }
78
+ return std::sqrt(norm);
79
+ }
80
+
81
+ void DenseMatrix::l2NormRow(Vector& norms) const {
82
+ assert(norms.size() == m_);
83
+ for (auto i = 0; i < m_; i++) {
84
+ norms[i] = l2NormRow(i);
85
+ }
86
+ }
87
+
88
+ real DenseMatrix::dotRow(const Vector& vec, int64_t i) const {
89
+ assert(i >= 0);
90
+ assert(i < m_);
91
+ assert(vec.size() == n_);
92
+ real d = 0.0;
93
+ for (int64_t j = 0; j < n_; j++) {
94
+ d += at(i, j) * vec[j];
95
+ }
96
+ if (std::isnan(d)) {
97
+ throw std::runtime_error("Encountered NaN.");
98
+ }
99
+ return d;
100
+ }
101
+
102
+ void DenseMatrix::addVectorToRow(const Vector& vec, int64_t i, real a) {
103
+ assert(i >= 0);
104
+ assert(i < m_);
105
+ assert(vec.size() == n_);
106
+ for (int64_t j = 0; j < n_; j++) {
107
+ data_[i * n_ + j] += a * vec[j];
108
+ }
109
+ }
110
+
111
+ void DenseMatrix::addRowToVector(Vector& x, int32_t i) const {
112
+ assert(i >= 0);
113
+ assert(i < this->size(0));
114
+ assert(x.size() == this->size(1));
115
+ for (int64_t j = 0; j < n_; j++) {
116
+ x[j] += at(i, j);
117
+ }
118
+ }
119
+
120
+ void DenseMatrix::addRowToVector(Vector& x, int32_t i, real a) const {
121
+ assert(i >= 0);
122
+ assert(i < this->size(0));
123
+ assert(x.size() == this->size(1));
124
+ for (int64_t j = 0; j < n_; j++) {
125
+ x[j] += a * at(i, j);
126
+ }
127
+ }
128
+
129
+ void DenseMatrix::save(std::ostream& out) const {
130
+ out.write((char*)&m_, sizeof(int64_t));
131
+ out.write((char*)&n_, sizeof(int64_t));
132
+ out.write((char*)data_.data(), m_ * n_ * sizeof(real));
133
+ }
134
+
135
+ void DenseMatrix::load(std::istream& in) {
136
+ in.read((char*)&m_, sizeof(int64_t));
137
+ in.read((char*)&n_, sizeof(int64_t));
138
+ data_ = std::vector<real>(m_ * n_);
139
+ in.read((char*)data_.data(), m_ * n_ * sizeof(real));
140
+ }
141
+
142
+ void DenseMatrix::dump(std::ostream& out) const {
143
+ out << m_ << " " << n_ << std::endl;
144
+ for (int64_t i = 0; i < m_; i++) {
145
+ for (int64_t j = 0; j < n_; j++) {
146
+ if (j > 0) {
147
+ out << " ";
148
+ }
149
+ out << at(i, j);
150
+ }
151
+ out << std::endl;
152
+ }
153
+ };
154
+
155
+ } // namespace fasttext
@@ -0,0 +1,75 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #pragma once
10
+
11
+ #include <cstdint>
12
+ #include <istream>
13
+ #include <ostream>
14
+ #include <vector>
15
+
16
+ #include <assert.h>
17
+ #include "matrix.h"
18
+ #include "real.h"
19
+
20
+ namespace fasttext {
21
+
22
+ class Vector;
23
+
24
+ class DenseMatrix : public Matrix {
25
+ protected:
26
+ std::vector<real> data_;
27
+
28
+ public:
29
+ DenseMatrix();
30
+ explicit DenseMatrix(int64_t, int64_t);
31
+ DenseMatrix(const DenseMatrix&) = default;
32
+ DenseMatrix(DenseMatrix&&) noexcept;
33
+ DenseMatrix& operator=(const DenseMatrix&) = delete;
34
+ DenseMatrix& operator=(DenseMatrix&&) = delete;
35
+ virtual ~DenseMatrix() noexcept override = default;
36
+
37
+ inline real* data() {
38
+ return data_.data();
39
+ }
40
+ inline const real* data() const {
41
+ return data_.data();
42
+ }
43
+
44
+ inline const real& at(int64_t i, int64_t j) const {
45
+ assert(i * n_ + j < data_.size());
46
+ return data_[i * n_ + j];
47
+ };
48
+ inline real& at(int64_t i, int64_t j) {
49
+ return data_[i * n_ + j];
50
+ };
51
+
52
+ inline int64_t rows() const {
53
+ return m_;
54
+ }
55
+ inline int64_t cols() const {
56
+ return n_;
57
+ }
58
+ void zero();
59
+ void uniform(real);
60
+
61
+ void multiplyRow(const Vector& nums, int64_t ib = 0, int64_t ie = -1);
62
+ void divideRow(const Vector& denoms, int64_t ib = 0, int64_t ie = -1);
63
+
64
+ real l2NormRow(int64_t i) const;
65
+ void l2NormRow(Vector& norms) const;
66
+
67
+ real dotRow(const Vector&, int64_t) const override;
68
+ void addVectorToRow(const Vector&, int64_t, real) override;
69
+ void addRowToVector(Vector& x, int32_t i) const override;
70
+ void addRowToVector(Vector& x, int32_t i, real a) const override;
71
+ void save(std::ostream&) const override;
72
+ void load(std::istream&) override;
73
+ void dump(std::ostream&) const override;
74
+ };
75
+ } // namespace fasttext
@@ -0,0 +1,540 @@
1
+ /**
2
+ * Copyright (c) 2016-present, Facebook, Inc.
3
+ * All rights reserved.
4
+ *
5
+ * This source code is licensed under the MIT license found in the
6
+ * LICENSE file in the root directory of this source tree.
7
+ */
8
+
9
+ #include "dictionary.h"
10
+
11
+ #include <assert.h>
12
+
13
+ #include <algorithm>
14
+ #include <cmath>
15
+ #include <fstream>
16
+ #include <iostream>
17
+ #include <iterator>
18
+ #include <stdexcept>
19
+
20
+ namespace fasttext {
21
+
22
+ const std::string Dictionary::EOS = "</s>";
23
+ const std::string Dictionary::BOW = "<";
24
+ const std::string Dictionary::EOW = ">";
25
+
26
+ Dictionary::Dictionary(std::shared_ptr<Args> args)
27
+ : args_(args),
28
+ word2int_(MAX_VOCAB_SIZE, -1),
29
+ size_(0),
30
+ nwords_(0),
31
+ nlabels_(0),
32
+ ntokens_(0),
33
+ pruneidx_size_(-1) {}
34
+
35
+ Dictionary::Dictionary(std::shared_ptr<Args> args, std::istream& in)
36
+ : args_(args),
37
+ size_(0),
38
+ nwords_(0),
39
+ nlabels_(0),
40
+ ntokens_(0),
41
+ pruneidx_size_(-1) {
42
+ load(in);
43
+ }
44
+
45
+ int32_t Dictionary::find(const std::string& w) const {
46
+ return find(w, hash(w));
47
+ }
48
+
49
+ int32_t Dictionary::find(const std::string& w, uint32_t h) const {
50
+ int32_t word2intsize = word2int_.size();
51
+ int32_t id = h % word2intsize;
52
+ while (word2int_[id] != -1 && words_[word2int_[id]].word != w) {
53
+ id = (id + 1) % word2intsize;
54
+ }
55
+ return id;
56
+ }
57
+
58
+ void Dictionary::add(const std::string& w) {
59
+ int32_t h = find(w);
60
+ ntokens_++;
61
+ if (word2int_[h] == -1) {
62
+ entry e;
63
+ e.word = w;
64
+ e.count = 1;
65
+ e.type = getType(w);
66
+ words_.push_back(e);
67
+ word2int_[h] = size_++;
68
+ } else {
69
+ words_[word2int_[h]].count++;
70
+ }
71
+ }
72
+
73
+ int32_t Dictionary::nwords() const {
74
+ return nwords_;
75
+ }
76
+
77
+ int32_t Dictionary::nlabels() const {
78
+ return nlabels_;
79
+ }
80
+
81
+ int64_t Dictionary::ntokens() const {
82
+ return ntokens_;
83
+ }
84
+
85
+ const std::vector<int32_t>& Dictionary::getSubwords(int32_t i) const {
86
+ assert(i >= 0);
87
+ assert(i < nwords_);
88
+ return words_[i].subwords;
89
+ }
90
+
91
+ const std::vector<int32_t> Dictionary::getSubwords(
92
+ const std::string& word) const {
93
+ int32_t i = getId(word);
94
+ if (i >= 0) {
95
+ return getSubwords(i);
96
+ }
97
+ std::vector<int32_t> ngrams;
98
+ if (word != EOS) {
99
+ computeSubwords(BOW + word + EOW, ngrams);
100
+ }
101
+ return ngrams;
102
+ }
103
+
104
+ void Dictionary::getSubwords(
105
+ const std::string& word,
106
+ std::vector<int32_t>& ngrams,
107
+ std::vector<std::string>& substrings) const {
108
+ int32_t i = getId(word);
109
+ ngrams.clear();
110
+ substrings.clear();
111
+ if (i >= 0) {
112
+ ngrams.push_back(i);
113
+ substrings.push_back(words_[i].word);
114
+ }
115
+ if (word != EOS) {
116
+ computeSubwords(BOW + word + EOW, ngrams, &substrings);
117
+ }
118
+ }
119
+
120
+ bool Dictionary::discard(int32_t id, real rand) const {
121
+ assert(id >= 0);
122
+ assert(id < nwords_);
123
+ if (args_->model == model_name::sup) {
124
+ return false;
125
+ }
126
+ return rand > pdiscard_[id];
127
+ }
128
+
129
+ int32_t Dictionary::getId(const std::string& w, uint32_t h) const {
130
+ int32_t id = find(w, h);
131
+ return word2int_[id];
132
+ }
133
+
134
+ int32_t Dictionary::getId(const std::string& w) const {
135
+ int32_t h = find(w);
136
+ return word2int_[h];
137
+ }
138
+
139
+ entry_type Dictionary::getType(int32_t id) const {
140
+ assert(id >= 0);
141
+ assert(id < size_);
142
+ return words_[id].type;
143
+ }
144
+
145
+ entry_type Dictionary::getType(const std::string& w) const {
146
+ return (w.find(args_->label) == 0) ? entry_type::label : entry_type::word;
147
+ }
148
+
149
+ std::string Dictionary::getWord(int32_t id) const {
150
+ assert(id >= 0);
151
+ assert(id < size_);
152
+ return words_[id].word;
153
+ }
154
+
155
+ // The correct implementation of fnv should be:
156
+ // h = h ^ uint32_t(uint8_t(str[i]));
157
+ // Unfortunately, earlier version of fasttext used
158
+ // h = h ^ uint32_t(str[i]);
159
+ // which is undefined behavior (as char can be signed or unsigned).
160
+ // Since all fasttext models that were already released were trained
161
+ // using signed char, we fixed the hash function to make models
162
+ // compatible whatever compiler is used.
163
+ uint32_t Dictionary::hash(const std::string& str) const {
164
+ uint32_t h = 2166136261;
165
+ for (size_t i = 0; i < str.size(); i++) {
166
+ h = h ^ uint32_t(int8_t(str[i]));
167
+ h = h * 16777619;
168
+ }
169
+ return h;
170
+ }
171
+
172
+ void Dictionary::computeSubwords(
173
+ const std::string& word,
174
+ std::vector<int32_t>& ngrams,
175
+ std::vector<std::string>* substrings) const {
176
+ for (size_t i = 0; i < word.size(); i++) {
177
+ std::string ngram;
178
+ if ((word[i] & 0xC0) == 0x80) {
179
+ continue;
180
+ }
181
+ for (size_t j = i, n = 1; j < word.size() && n <= args_->maxn; n++) {
182
+ ngram.push_back(word[j++]);
183
+ while (j < word.size() && (word[j] & 0xC0) == 0x80) {
184
+ ngram.push_back(word[j++]);
185
+ }
186
+ if (n >= args_->minn && !(n == 1 && (i == 0 || j == word.size()))) {
187
+ int32_t h = hash(ngram) % args_->bucket;
188
+ pushHash(ngrams, h);
189
+ if (substrings) {
190
+ substrings->push_back(ngram);
191
+ }
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ void Dictionary::initNgrams() {
198
+ for (size_t i = 0; i < size_; i++) {
199
+ std::string word = BOW + words_[i].word + EOW;
200
+ words_[i].subwords.clear();
201
+ words_[i].subwords.push_back(i);
202
+ if (words_[i].word != EOS) {
203
+ computeSubwords(word, words_[i].subwords);
204
+ }
205
+ }
206
+ }
207
+
208
+ bool Dictionary::readWord(std::istream& in, std::string& word) const {
209
+ int c;
210
+ std::streambuf& sb = *in.rdbuf();
211
+ word.clear();
212
+ while ((c = sb.sbumpc()) != EOF) {
213
+ if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' ||
214
+ c == '\f' || c == '\0') {
215
+ if (word.empty()) {
216
+ if (c == '\n') {
217
+ word += EOS;
218
+ return true;
219
+ }
220
+ continue;
221
+ } else {
222
+ if (c == '\n')
223
+ sb.sungetc();
224
+ return true;
225
+ }
226
+ }
227
+ word.push_back(c);
228
+ }
229
+ // trigger eofbit
230
+ in.get();
231
+ return !word.empty();
232
+ }
233
+
234
+ void Dictionary::readFromFile(std::istream& in) {
235
+ std::string word;
236
+ int64_t minThreshold = 1;
237
+ while (readWord(in, word)) {
238
+ add(word);
239
+ if (ntokens_ % 1000000 == 0 && args_->verbose > 1) {
240
+ std::cerr << "\rRead " << ntokens_ / 1000000 << "M words" << std::flush;
241
+ }
242
+ if (size_ > 0.75 * MAX_VOCAB_SIZE) {
243
+ minThreshold++;
244
+ threshold(minThreshold, minThreshold);
245
+ }
246
+ }
247
+ threshold(args_->minCount, args_->minCountLabel);
248
+ initTableDiscard();
249
+ initNgrams();
250
+ if (args_->verbose > 0) {
251
+ std::cerr << "\rRead " << ntokens_ / 1000000 << "M words" << std::endl;
252
+ std::cerr << "Number of words: " << nwords_ << std::endl;
253
+ std::cerr << "Number of labels: " << nlabels_ << std::endl;
254
+ }
255
+ if (size_ == 0) {
256
+ throw std::invalid_argument(
257
+ "Empty vocabulary. Try a smaller -minCount value.");
258
+ }
259
+ }
260
+
261
+ void Dictionary::threshold(int64_t t, int64_t tl) {
262
+ sort(words_.begin(), words_.end(), [](const entry& e1, const entry& e2) {
263
+ if (e1.type != e2.type) {
264
+ return e1.type < e2.type;
265
+ }
266
+ return e1.count > e2.count;
267
+ });
268
+ words_.erase(
269
+ remove_if(
270
+ words_.begin(),
271
+ words_.end(),
272
+ [&](const entry& e) {
273
+ return (e.type == entry_type::word && e.count < t) ||
274
+ (e.type == entry_type::label && e.count < tl);
275
+ }),
276
+ words_.end());
277
+ words_.shrink_to_fit();
278
+ size_ = 0;
279
+ nwords_ = 0;
280
+ nlabels_ = 0;
281
+ std::fill(word2int_.begin(), word2int_.end(), -1);
282
+ for (auto it = words_.begin(); it != words_.end(); ++it) {
283
+ int32_t h = find(it->word);
284
+ word2int_[h] = size_++;
285
+ if (it->type == entry_type::word) {
286
+ nwords_++;
287
+ }
288
+ if (it->type == entry_type::label) {
289
+ nlabels_++;
290
+ }
291
+ }
292
+ }
293
+
294
+ void Dictionary::initTableDiscard() {
295
+ pdiscard_.resize(size_);
296
+ for (size_t i = 0; i < size_; i++) {
297
+ real f = real(words_[i].count) / real(ntokens_);
298
+ pdiscard_[i] = std::sqrt(args_->t / f) + args_->t / f;
299
+ }
300
+ }
301
+
302
+ std::vector<int64_t> Dictionary::getCounts(entry_type type) const {
303
+ std::vector<int64_t> counts;
304
+ for (auto& w : words_) {
305
+ if (w.type == type) {
306
+ counts.push_back(w.count);
307
+ }
308
+ }
309
+ return counts;
310
+ }
311
+
312
+ void Dictionary::addWordNgrams(
313
+ std::vector<int32_t>& line,
314
+ const std::vector<int32_t>& hashes,
315
+ int32_t n) const {
316
+ for (int32_t i = 0; i < hashes.size(); i++) {
317
+ uint64_t h = hashes[i];
318
+ for (int32_t j = i + 1; j < hashes.size() && j < i + n; j++) {
319
+ h = h * 116049371 + hashes[j];
320
+ pushHash(line, h % args_->bucket);
321
+ }
322
+ }
323
+ }
324
+
325
+ void Dictionary::addSubwords(
326
+ std::vector<int32_t>& line,
327
+ const std::string& token,
328
+ int32_t wid) const {
329
+ if (wid < 0) { // out of vocab
330
+ if (token != EOS) {
331
+ computeSubwords(BOW + token + EOW, line);
332
+ }
333
+ } else {
334
+ if (args_->maxn <= 0) { // in vocab w/o subwords
335
+ line.push_back(wid);
336
+ } else { // in vocab w/ subwords
337
+ const std::vector<int32_t>& ngrams = getSubwords(wid);
338
+ line.insert(line.end(), ngrams.cbegin(), ngrams.cend());
339
+ }
340
+ }
341
+ }
342
+
343
+ void Dictionary::reset(std::istream& in) const {
344
+ if (in.eof()) {
345
+ in.clear();
346
+ in.seekg(std::streampos(0));
347
+ }
348
+ }
349
+
350
+ int32_t Dictionary::getLine(
351
+ std::istream& in,
352
+ std::vector<int32_t>& words,
353
+ std::minstd_rand& rng) const {
354
+ std::uniform_real_distribution<> uniform(0, 1);
355
+ std::string token;
356
+ int32_t ntokens = 0;
357
+
358
+ reset(in);
359
+ words.clear();
360
+ while (readWord(in, token)) {
361
+ int32_t h = find(token);
362
+ int32_t wid = word2int_[h];
363
+ if (wid < 0) {
364
+ continue;
365
+ }
366
+
367
+ ntokens++;
368
+ if (getType(wid) == entry_type::word && !discard(wid, uniform(rng))) {
369
+ words.push_back(wid);
370
+ }
371
+ if (ntokens > MAX_LINE_SIZE || token == EOS) {
372
+ break;
373
+ }
374
+ }
375
+ return ntokens;
376
+ }
377
+
378
+ int32_t Dictionary::getLine(
379
+ std::istream& in,
380
+ std::vector<int32_t>& words,
381
+ std::vector<int32_t>& labels) const {
382
+ std::vector<int32_t> word_hashes;
383
+ std::string token;
384
+ int32_t ntokens = 0;
385
+
386
+ reset(in);
387
+ words.clear();
388
+ labels.clear();
389
+ while (readWord(in, token)) {
390
+ uint32_t h = hash(token);
391
+ int32_t wid = getId(token, h);
392
+ entry_type type = wid < 0 ? getType(token) : getType(wid);
393
+
394
+ ntokens++;
395
+ if (type == entry_type::word) {
396
+ addSubwords(words, token, wid);
397
+ word_hashes.push_back(h);
398
+ } else if (type == entry_type::label && wid >= 0) {
399
+ labels.push_back(wid - nwords_);
400
+ }
401
+ if (token == EOS) {
402
+ break;
403
+ }
404
+ }
405
+ addWordNgrams(words, word_hashes, args_->wordNgrams);
406
+ return ntokens;
407
+ }
408
+
409
+ void Dictionary::pushHash(std::vector<int32_t>& hashes, int32_t id) const {
410
+ if (pruneidx_size_ == 0 || id < 0) {
411
+ return;
412
+ }
413
+ if (pruneidx_size_ > 0) {
414
+ if (pruneidx_.count(id)) {
415
+ id = pruneidx_.at(id);
416
+ } else {
417
+ return;
418
+ }
419
+ }
420
+ hashes.push_back(nwords_ + id);
421
+ }
422
+
423
+ std::string Dictionary::getLabel(int32_t lid) const {
424
+ if (lid < 0 || lid >= nlabels_) {
425
+ throw std::invalid_argument(
426
+ "Label id is out of range [0, " + std::to_string(nlabels_) + "]");
427
+ }
428
+ return words_[lid + nwords_].word;
429
+ }
430
+
431
+ void Dictionary::save(std::ostream& out) const {
432
+ out.write((char*)&size_, sizeof(int32_t));
433
+ out.write((char*)&nwords_, sizeof(int32_t));
434
+ out.write((char*)&nlabels_, sizeof(int32_t));
435
+ out.write((char*)&ntokens_, sizeof(int64_t));
436
+ out.write((char*)&pruneidx_size_, sizeof(int64_t));
437
+ for (int32_t i = 0; i < size_; i++) {
438
+ entry e = words_[i];
439
+ out.write(e.word.data(), e.word.size() * sizeof(char));
440
+ out.put(0);
441
+ out.write((char*)&(e.count), sizeof(int64_t));
442
+ out.write((char*)&(e.type), sizeof(entry_type));
443
+ }
444
+ for (const auto pair : pruneidx_) {
445
+ out.write((char*)&(pair.first), sizeof(int32_t));
446
+ out.write((char*)&(pair.second), sizeof(int32_t));
447
+ }
448
+ }
449
+
450
+ void Dictionary::load(std::istream& in) {
451
+ words_.clear();
452
+ in.read((char*)&size_, sizeof(int32_t));
453
+ in.read((char*)&nwords_, sizeof(int32_t));
454
+ in.read((char*)&nlabels_, sizeof(int32_t));
455
+ in.read((char*)&ntokens_, sizeof(int64_t));
456
+ in.read((char*)&pruneidx_size_, sizeof(int64_t));
457
+ for (int32_t i = 0; i < size_; i++) {
458
+ char c;
459
+ entry e;
460
+ while ((c = in.get()) != 0) {
461
+ e.word.push_back(c);
462
+ }
463
+ in.read((char*)&e.count, sizeof(int64_t));
464
+ in.read((char*)&e.type, sizeof(entry_type));
465
+ words_.push_back(e);
466
+ }
467
+ pruneidx_.clear();
468
+ for (int32_t i = 0; i < pruneidx_size_; i++) {
469
+ int32_t first;
470
+ int32_t second;
471
+ in.read((char*)&first, sizeof(int32_t));
472
+ in.read((char*)&second, sizeof(int32_t));
473
+ pruneidx_[first] = second;
474
+ }
475
+ initTableDiscard();
476
+ initNgrams();
477
+
478
+ int32_t word2intsize = std::ceil(size_ / 0.7);
479
+ word2int_.assign(word2intsize, -1);
480
+ for (int32_t i = 0; i < size_; i++) {
481
+ word2int_[find(words_[i].word)] = i;
482
+ }
483
+ }
484
+
485
+ void Dictionary::init() {
486
+ initTableDiscard();
487
+ initNgrams();
488
+ }
489
+
490
+ void Dictionary::prune(std::vector<int32_t>& idx) {
491
+ std::vector<int32_t> words, ngrams;
492
+ for (auto it = idx.cbegin(); it != idx.cend(); ++it) {
493
+ if (*it < nwords_) {
494
+ words.push_back(*it);
495
+ } else {
496
+ ngrams.push_back(*it);
497
+ }
498
+ }
499
+ std::sort(words.begin(), words.end());
500
+ idx = words;
501
+
502
+ if (ngrams.size() != 0) {
503
+ int32_t j = 0;
504
+ for (const auto ngram : ngrams) {
505
+ pruneidx_[ngram - nwords_] = j;
506
+ j++;
507
+ }
508
+ idx.insert(idx.end(), ngrams.begin(), ngrams.end());
509
+ }
510
+ pruneidx_size_ = pruneidx_.size();
511
+
512
+ std::fill(word2int_.begin(), word2int_.end(), -1);
513
+
514
+ int32_t j = 0;
515
+ for (int32_t i = 0; i < words_.size(); i++) {
516
+ if (getType(i) == entry_type::label ||
517
+ (j < words.size() && words[j] == i)) {
518
+ words_[j] = words_[i];
519
+ word2int_[find(words_[j].word)] = j;
520
+ j++;
521
+ }
522
+ }
523
+ nwords_ = words.size();
524
+ size_ = nwords_ + nlabels_;
525
+ words_.erase(words_.begin() + size_, words_.end());
526
+ initNgrams();
527
+ }
528
+
529
+ void Dictionary::dump(std::ostream& out) const {
530
+ out << words_.size() << std::endl;
531
+ for (auto it : words_) {
532
+ std::string entryType = "word";
533
+ if (it.type == entry_type::label) {
534
+ entryType = "label";
535
+ }
536
+ out << it.word << " " << it.count << " " << entryType << std::endl;
537
+ }
538
+ }
539
+
540
+ } // namespace fasttext