fasttext 0.1.0 → 0.2.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (498) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/LICENSE.txt +18 -18
  4. data/README.md +39 -12
  5. data/ext/fasttext/ext.cpp +108 -101
  6. data/ext/fasttext/extconf.rb +7 -9
  7. data/lib/fasttext.rb +3 -0
  8. data/lib/fasttext/classifier.rb +25 -7
  9. data/lib/fasttext/vectorizer.rb +7 -2
  10. data/lib/fasttext/version.rb +1 -1
  11. data/vendor/fastText/README.md +3 -3
  12. data/vendor/fastText/src/args.cc +179 -6
  13. data/vendor/fastText/src/args.h +29 -1
  14. data/vendor/fastText/src/autotune.cc +477 -0
  15. data/vendor/fastText/src/autotune.h +89 -0
  16. data/vendor/fastText/src/densematrix.cc +27 -7
  17. data/vendor/fastText/src/densematrix.h +10 -2
  18. data/vendor/fastText/src/fasttext.cc +125 -114
  19. data/vendor/fastText/src/fasttext.h +31 -52
  20. data/vendor/fastText/src/main.cc +32 -13
  21. data/vendor/fastText/src/meter.cc +148 -2
  22. data/vendor/fastText/src/meter.h +24 -2
  23. data/vendor/fastText/src/model.cc +0 -1
  24. data/vendor/fastText/src/real.h +0 -1
  25. data/vendor/fastText/src/utils.cc +25 -0
  26. data/vendor/fastText/src/utils.h +29 -0
  27. data/vendor/fastText/src/vector.cc +0 -1
  28. metadata +16 -539
  29. data/lib/fasttext/ext.bundle +0 -0
  30. data/vendor/fastText/CMakeLists.txt +0 -68
  31. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  32. data/vendor/fastText/CONTRIBUTING.md +0 -32
  33. data/vendor/fastText/MANIFEST.in +0 -5
  34. data/vendor/fastText/Makefile +0 -63
  35. data/vendor/fastText/alignment/README.md +0 -53
  36. data/vendor/fastText/alignment/align.py +0 -145
  37. data/vendor/fastText/alignment/eval.py +0 -60
  38. data/vendor/fastText/alignment/example.sh +0 -51
  39. data/vendor/fastText/alignment/unsup_align.py +0 -109
  40. data/vendor/fastText/alignment/utils.py +0 -154
  41. data/vendor/fastText/classification-example.sh +0 -41
  42. data/vendor/fastText/classification-results.sh +0 -94
  43. data/vendor/fastText/crawl/README.md +0 -26
  44. data/vendor/fastText/crawl/dedup.cc +0 -51
  45. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  46. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  47. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  48. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  49. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  50. data/vendor/fastText/docs/api.md +0 -6
  51. data/vendor/fastText/docs/cheatsheet.md +0 -66
  52. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  53. data/vendor/fastText/docs/dataset.md +0 -6
  54. data/vendor/fastText/docs/english-vectors.md +0 -53
  55. data/vendor/fastText/docs/faqs.md +0 -63
  56. data/vendor/fastText/docs/language-identification.md +0 -47
  57. data/vendor/fastText/docs/options.md +0 -50
  58. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  59. data/vendor/fastText/docs/python-module.md +0 -314
  60. data/vendor/fastText/docs/references.md +0 -41
  61. data/vendor/fastText/docs/supervised-models.md +0 -54
  62. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  63. data/vendor/fastText/docs/support.md +0 -58
  64. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  65. data/vendor/fastText/eval.py +0 -95
  66. data/vendor/fastText/get-wikimedia.sh +0 -79
  67. data/vendor/fastText/python/README.md +0 -322
  68. data/vendor/fastText/python/README.rst +0 -406
  69. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  70. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  71. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  72. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  73. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  74. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  75. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  76. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  77. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  78. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  79. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  80. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  81. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  82. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  83. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  84. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  85. data/vendor/fastText/quantization-example.sh +0 -40
  86. data/vendor/fastText/runtests.py +0 -60
  87. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  88. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  89. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  90. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  91. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  92. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  93. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  94. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  95. data/vendor/fastText/setup.cfg +0 -2
  96. data/vendor/fastText/setup.py +0 -203
  97. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  98. data/vendor/fastText/website/README.md +0 -6
  99. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  100. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  101. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  102. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  103. data/vendor/fastText/website/core/Footer.js +0 -127
  104. data/vendor/fastText/website/package.json +0 -12
  105. data/vendor/fastText/website/pages/en/index.js +0 -286
  106. data/vendor/fastText/website/sidebars.json +0 -18
  107. data/vendor/fastText/website/siteConfig.js +0 -102
  108. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  109. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  110. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  111. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  112. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  113. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  114. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  115. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  116. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  121. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  122. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  123. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  124. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  125. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  126. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  127. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  141. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  142. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  143. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  144. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  145. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  146. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  147. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  148. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  149. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  150. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  151. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  152. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  153. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  154. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  155. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  156. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  157. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  158. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  159. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  161. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  162. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  163. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  164. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  165. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  166. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  167. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  168. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  169. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  170. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  171. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  172. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  173. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  174. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  175. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  176. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  177. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  178. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  179. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  180. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  181. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  182. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  183. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  184. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  185. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  186. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  187. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  188. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  189. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  190. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  191. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  192. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  193. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  194. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  195. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  196. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  197. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  198. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  199. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  200. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  201. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  202. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  203. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  204. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  205. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  206. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  207. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  208. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  209. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  210. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  211. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  212. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  213. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  214. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  215. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  216. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  217. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  218. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  219. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  220. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  221. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  222. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  223. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  224. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  225. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  226. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  227. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  228. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  229. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  230. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  232. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  233. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  237. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  241. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  243. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  245. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  247. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  249. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  251. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  253. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  255. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  257. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  259. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  261. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  263. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  265. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  267. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  269. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  271. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  273. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  275. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  277. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  279. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  280. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  281. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  282. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  283. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  284. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  285. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  286. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  287. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  288. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  289. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  290. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  291. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  299. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  302. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  304. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  308. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  310. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  312. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  314. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  316. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  318. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  322. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  324. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  326. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  332. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  338. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  342. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  392. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  394. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  395. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  396. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  397. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  398. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  402. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  404. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  446. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  447. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  448. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  449. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  450. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  451. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  452. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  453. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  454. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  455. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  456. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  457. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  459. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  460. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  461. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  462. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  463. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  464. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  465. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  466. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  467. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  468. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  469. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  470. data/vendor/fastText/website/static/fasttext.css +0 -48
  471. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  472. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  473. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  474. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  475. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  476. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  477. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  478. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  479. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  480. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  481. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  482. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  483. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  484. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  485. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  486. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  487. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  488. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  489. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  490. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  491. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  492. data/vendor/fastText/website/static/img/model-black.png +0 -0
  493. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  494. data/vendor/fastText/website/static/img/model-red.png +0 -0
  495. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  496. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  497. data/vendor/fastText/wikifil.pl +0 -57
  498. data/vendor/fastText/word-vector-example.sh +0 -39
Binary file
@@ -1,68 +0,0 @@
1
- #
2
- # Copyright (c) 2016-present, Facebook, Inc.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the MIT license found in the
6
- # LICENSE file in the root directory of this source tree.
7
- #
8
-
9
- cmake_minimum_required(VERSION 2.8.9)
10
- project(fasttext)
11
-
12
- # The version number.
13
- set (fasttext_VERSION_MAJOR 0)
14
- set (fasttext_VERSION_MINOR 1)
15
-
16
- include_directories(fasttext)
17
-
18
- set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3 -march=native")
19
-
20
- set(HEADER_FILES
21
- src/args.h
22
- src/densematrix.h
23
- src/dictionary.h
24
- src/fasttext.h
25
- src/loss.h
26
- src/matrix.h
27
- src/meter.h
28
- src/model.h
29
- src/productquantizer.h
30
- src/quantmatrix.h
31
- src/real.h
32
- src/utils.h
33
- src/vector.h)
34
-
35
- set(SOURCE_FILES
36
- src/args.cc
37
- src/densematrix.cc
38
- src/dictionary.cc
39
- src/fasttext.cc
40
- src/loss.cc
41
- src/main.cc
42
- src/matrix.cc
43
- src/meter.cc
44
- src/model.cc
45
- src/productquantizer.cc
46
- src/quantmatrix.cc
47
- src/utils.cc
48
- src/vector.cc)
49
-
50
- add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
51
- add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
52
- add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
53
- set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
54
- set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
55
- set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
56
- POSITION_INDEPENDENT_CODE True)
57
- add_executable(fasttext-bin src/main.cc)
58
- target_link_libraries(fasttext-bin pthread fasttext-static)
59
- set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
60
- install (TARGETS fasttext-shared
61
- LIBRARY DESTINATION lib)
62
- install (TARGETS fasttext-static
63
- ARCHIVE DESTINATION lib)
64
- install (TARGETS fasttext-static_pic
65
- ARCHIVE DESTINATION lib)
66
- install (TARGETS fasttext-bin
67
- RUNTIME DESTINATION bin
68
- PUBLIC_HEADER DESTINATION include/fasttext)
@@ -1,2 +0,0 @@
1
- # Code of Conduct
2
- Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please [read the full text](https://code.fb.com/codeofconduct) so that you can understand what actions will and will not be tolerated.
@@ -1,32 +0,0 @@
1
- # Contributing to fastText
2
- We want to make contributing to this project as easy and transparent as possible.
3
-
4
- ## Issues
5
- We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue.
6
-
7
- ### Reproducing issues
8
- Please make sure that the issue you mention is not a result of one of the existing third-party libraries. For example, please do not post an issue if you encountered an error within a third-party Python library. We can only help you with errors which can be directly reproduced either with our C++ code or the corresponding Python bindings. If you do find an error, please post detailed steps to reproduce it. If we can't reproduce your error, we can't help you fix it.
9
-
10
- ## Pull Requests
11
- Please post an Issue before submitting a pull request. This might save you some time as it is possible we can't support your contribution, albeit we try our best to accomodate your (planned) work and highly appreciate your time. Generally, it is best to have a pull request emerge from an issue rather than the other way around.
12
-
13
- To create a pull request:
14
-
15
- 1. Fork the repo and create your branch from `master`.
16
- 2. If you've added code that should be tested, add tests.
17
- 3. If you've changed APIs, update the documentation.
18
- 4. Ensure the test suite passes.
19
- 5. Make sure your code lints.
20
- 6. If you haven't already, complete the Contributor License Agreement ("CLA").
21
-
22
- ## Tests
23
- First, you will need to make sure you have the required data. For that, please have a look at the fetch_test_data.sh script under tests. Next run the tests using the runtests.py script passing a path to the directory containing the datasets.
24
-
25
- ## Contributor License Agreement ("CLA")
26
- In order to accept your pull request, we need you to submit a CLA. You only need
27
- to do this once to work on any of Facebook's open source projects.
28
-
29
- Complete your CLA here: <https://code.facebook.com/cla>
30
-
31
- ## License
32
- By contributing to fastText, you agree that your contributions will be licensed under its MIT license.
@@ -1,5 +0,0 @@
1
- include LICENSE
2
- include PATENTS
3
-
4
- recursive-include python *.md *.rst
5
- recursive-include src *.h
@@ -1,63 +0,0 @@
1
- #
2
- # Copyright (c) 2016-present, Facebook, Inc.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the MIT license found in the
6
- # LICENSE file in the root directory of this source tree.
7
- #
8
-
9
- CXX = c++
10
- CXXFLAGS = -pthread -std=c++0x -march=native
11
- OBJS = args.o matrix.o dictionary.o loss.o productquantizer.o densematrix.o quantmatrix.o vector.o model.o utils.o meter.o fasttext.o
12
- INCLUDES = -I.
13
-
14
- opt: CXXFLAGS += -O3 -funroll-loops -DNDEBUG
15
- opt: fasttext
16
-
17
- coverage: CXXFLAGS += -O0 -fno-inline -fprofile-arcs --coverage
18
- coverage: fasttext
19
-
20
- debug: CXXFLAGS += -g -O0 -fno-inline
21
- debug: fasttext
22
-
23
- args.o: src/args.cc src/args.h
24
- $(CXX) $(CXXFLAGS) -c src/args.cc
25
-
26
- matrix.o: src/matrix.cc src/matrix.h
27
- $(CXX) $(CXXFLAGS) -c src/matrix.cc
28
-
29
- dictionary.o: src/dictionary.cc src/dictionary.h src/args.h
30
- $(CXX) $(CXXFLAGS) -c src/dictionary.cc
31
-
32
- loss.o: src/loss.cc src/loss.h src/matrix.h src/real.h
33
- $(CXX) $(CXXFLAGS) -c src/loss.cc
34
-
35
- productquantizer.o: src/productquantizer.cc src/productquantizer.h src/utils.h
36
- $(CXX) $(CXXFLAGS) -c src/productquantizer.cc
37
-
38
- densematrix.o: src/densematrix.cc src/densematrix.h src/utils.h src/matrix.h
39
- $(CXX) $(CXXFLAGS) -c src/densematrix.cc
40
-
41
- quantmatrix.o: src/quantmatrix.cc src/quantmatrix.h src/utils.h src/matrix.h
42
- $(CXX) $(CXXFLAGS) -c src/quantmatrix.cc
43
-
44
- vector.o: src/vector.cc src/vector.h src/utils.h
45
- $(CXX) $(CXXFLAGS) -c src/vector.cc
46
-
47
- model.o: src/model.cc src/model.h src/args.h
48
- $(CXX) $(CXXFLAGS) -c src/model.cc
49
-
50
- utils.o: src/utils.cc src/utils.h
51
- $(CXX) $(CXXFLAGS) -c src/utils.cc
52
-
53
- meter.o: src/meter.cc src/meter.h
54
- $(CXX) $(CXXFLAGS) -c src/meter.cc
55
-
56
- fasttext.o: src/fasttext.cc src/*.h
57
- $(CXX) $(CXXFLAGS) -c src/fasttext.cc
58
-
59
- fasttext: $(OBJS) src/fasttext.cc
60
- $(CXX) $(CXXFLAGS) $(OBJS) src/main.cc -o fasttext
61
-
62
- clean:
63
- rm -rf *.o *.gcno *.gcda fasttext
@@ -1,53 +0,0 @@
1
- ## Alignment of Word Embeddings
2
-
3
- This directory provides code for learning alignments between word embeddings in different languages.
4
-
5
- The code is in Python 3 and requires [NumPy](http://www.numpy.org/).
6
-
7
- The script `example.sh` shows how to use this code to learn and evaluate a bilingual alignment of word embeddings.
8
-
9
- The word embeddings used in [1] can be found on the [fastText project page](https://fasttext.cc) and the supervised bilingual lexicons on the [MUSE project page](https://github.com/facebookresearch/MUSE).
10
-
11
- ### Supervised alignment
12
-
13
- The script `align.py` aligns word embeddings from two languages using a bilingual lexicon as supervision.
14
- The details of this approach can be found in [1].
15
-
16
- ### Unsupervised alignment
17
-
18
- The script `unsup_align.py` aligns word embeddings from two languages without requiring any supervision.
19
- The details of this approach can be found in [2].
20
-
21
- In addition to NumPy, the unsupervised method requires the [Python Optimal Transport](https://pot.readthedocs.io/en/stable/) toolbox.
22
-
23
- ### Download
24
-
25
- Wikipedia fastText embeddings aligned with our method can be found [here](https://fasttext.cc/docs/en/aligned-vectors.html).
26
-
27
- ### References
28
-
29
- If you use the supervised alignment method, please cite:
30
-
31
- [1] A. Joulin, P. Bojanowski, T. Mikolov, H. Jegou, E. Grave, [*Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion*](https://arxiv.org/abs/1804.07745)
32
-
33
- ```
34
- @InProceedings{joulin2018loss,
35
- title={Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion},
36
- author={Joulin, Armand and Bojanowski, Piotr and Mikolov, Tomas and J\'egou, Herv\'e and Grave, Edouard},
37
- year={2018},
38
- booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
39
- }
40
- ```
41
-
42
- If you use the unsupervised alignment method, please cite:
43
-
44
- [2] E. Grave, A. Joulin, Q. Berthet, [*Unsupervised Alignment of Embeddings with Wasserstein Procrustes*](https://arxiv.org/abs/1805.11222)
45
-
46
- ```
47
- @article{grave2018unsupervised,
48
- title={Unsupervised Alignment of Embeddings with Wasserstein Procrustes},
49
- author={Grave, Edouard and Joulin, Armand and Berthet, Quentin},
50
- journal={arXiv preprint arXiv:1805.11222},
51
- year={2018}
52
- }
53
- ```
@@ -1,145 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- #
4
- # Copyright (c) 2018-present, Facebook, Inc.
5
- # All rights reserved.
6
- #
7
- # This source code is licensed under the license found in the
8
- # LICENSE file in the root directory of this source tree.
9
-
10
- import numpy as np
11
- import argparse
12
- from utils import *
13
- import sys
14
-
15
- parser = argparse.ArgumentParser(description='RCSLS for supervised word alignment')
16
-
17
- parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
18
- parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
19
- parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
20
-
21
- parser.add_argument("--dico_train", type=str, default='', help="train dictionary")
22
- parser.add_argument("--dico_test", type=str, default='', help="validation dictionary")
23
-
24
- parser.add_argument("--output", type=str, default='', help="where to save aligned embeddings")
25
-
26
- parser.add_argument("--knn", type=int, default=10, help="number of nearest neighbors in RCSL/CSLS")
27
- parser.add_argument("--maxneg", type=int, default=200000, help="Maximum number of negatives for the Extended RCSLS")
28
- parser.add_argument("--maxsup", type=int, default=-1, help="Maximum number of training examples")
29
- parser.add_argument("--maxload", type=int, default=200000, help="Maximum number of loaded vectors")
30
-
31
- parser.add_argument("--model", type=str, default="none", help="Set of constraints: spectral or none")
32
- parser.add_argument("--reg", type=float, default=0.0 , help='regularization parameters')
33
-
34
- parser.add_argument("--lr", type=float, default=1.0, help='learning rate')
35
- parser.add_argument("--niter", type=int, default=10, help='number of iterations')
36
- parser.add_argument('--sgd', action='store_true', help='use sgd')
37
- parser.add_argument("--batchsize", type=int, default=10000, help="batch size for sgd")
38
-
39
- params = parser.parse_args()
40
-
41
- ###### SPECIFIC FUNCTIONS ######
42
- # functions specific to RCSLS
43
- # the rest of the functions are in utils.py
44
-
45
- def getknn(sc, x, y, k=10):
46
- sidx = np.argpartition(sc, -k, axis=1)[:, -k:]
47
- ytopk = y[sidx.flatten(), :]
48
- ytopk = ytopk.reshape(sidx.shape[0], sidx.shape[1], y.shape[1])
49
- f = np.sum(sc[np.arange(sc.shape[0])[:, None], sidx])
50
- df = np.dot(ytopk.sum(1).T, x)
51
- return f / k, df / k
52
-
53
-
54
- def rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, knn=10):
55
- X_trans = np.dot(X_src, R.T)
56
- f = 2 * np.sum(X_trans * Y_tgt)
57
- df = 2 * np.dot(Y_tgt.T, X_src)
58
- fk0, dfk0 = getknn(np.dot(X_trans, Z_tgt.T), X_src, Z_tgt, knn)
59
- fk1, dfk1 = getknn(np.dot(np.dot(Z_src, R.T), Y_tgt.T).T, Y_tgt, Z_src, knn)
60
- f = f - fk0 -fk1
61
- df = df - dfk0 - dfk1.T
62
- return -f / X_src.shape[0], -df / X_src.shape[0]
63
-
64
-
65
- def proj_spectral(R):
66
- U, s, V = np.linalg.svd(R)
67
- s[s > 1] = 1
68
- s[s < 0] = 0
69
- return np.dot(U, np.dot(np.diag(s), V))
70
-
71
-
72
- ###### MAIN ######
73
-
74
- # load word embeddings
75
- words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
76
- words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
77
-
78
- # load validation bilingual lexicon
79
- src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
80
-
81
- # word --> vector indices
82
- idx_src = idx(words_src)
83
- idx_tgt = idx(words_tgt)
84
-
85
- # load train bilingual lexicon
86
- pairs = load_pairs(params.dico_train, idx_src, idx_tgt)
87
- if params.maxsup > 0 and params.maxsup < len(pairs):
88
- pairs = pairs[:params.maxsup]
89
-
90
- # selecting training vector pairs
91
- X_src, Y_tgt = select_vectors_from_pairs(x_src, x_tgt, pairs)
92
-
93
- # adding negatives for RCSLS
94
- Z_src = x_src[:params.maxneg, :]
95
- Z_tgt = x_tgt[:params.maxneg, :]
96
-
97
- # initialization:
98
- R = procrustes(X_src, Y_tgt)
99
- nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
100
- print("[init -- Procrustes] NN: %.4f"%(nnacc))
101
- sys.stdout.flush()
102
-
103
- # optimization
104
- fold, Rold = 0, []
105
- niter, lr = params.niter, params.lr
106
-
107
- for it in range(0, niter + 1):
108
- if lr < 1e-4:
109
- break
110
-
111
- if params.sgd:
112
- indices = np.random.choice(X_src.shape[0], size=params.batchsize, replace=False)
113
- f, df = rcsls(X_src[indices, :], Y_tgt[indices, :], Z_src, Z_tgt, R, params.knn)
114
- else:
115
- f, df = rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, params.knn)
116
-
117
- if params.reg > 0:
118
- R *= (1 - lr * params.reg)
119
- R -= lr * df
120
- if params.model == "spectral":
121
- R = proj_spectral(R)
122
-
123
- print("[it=%d] f = %.4f" % (it, f))
124
- sys.stdout.flush()
125
-
126
- if f > fold and it > 0 and not params.sgd:
127
- lr /= 2
128
- f, R = fold, Rold
129
-
130
- fold, Rold = f, R
131
-
132
- if (it > 0 and it % 10 == 0) or it == niter:
133
- nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
134
- print("[it=%d] NN = %.4f - Coverage = %.4f" % (it, nnacc, len(src2tgt) / lexicon_size))
135
-
136
- nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
137
- print("[final] NN = %.4f - Coverage = %.4f" % (nnacc, len(src2tgt) / lexicon_size))
138
-
139
- if params.output != "":
140
- print("Saving all aligned vectors at %s" % params.output)
141
- words_full, x_full = load_vectors(params.src_emb, maxload=-1, center=params.center, verbose=False)
142
- x = np.dot(x_full, R.T)
143
- x /= np.linalg.norm(x, axis=1)[:, np.newaxis] + 1e-8
144
- save_vectors(params.output, x, words_full)
145
- save_matrix(params.output + "-mat", R)
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- #
4
- # Copyright (c) 2018-present, Facebook, Inc.
5
- # All rights reserved.
6
- #
7
- # This source code is licensed under the license found in the
8
- # LICENSE file in the root directory of this source tree.
9
-
10
- import io
11
- import numpy as np
12
- import argparse
13
- from utils import *
14
-
15
- parser = argparse.ArgumentParser(description='Evaluation of word alignment')
16
- parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
17
- parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
18
- parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
19
- parser.add_argument("--src_mat", type=str, default='', help="Load source alignment matrix. If none given, the aligment matrix is the identity.")
20
- parser.add_argument("--tgt_mat", type=str, default='', help="Load target alignment matrix. If none given, the aligment matrix is the identity.")
21
- parser.add_argument("--dico_test", type=str, default='', help="test dictionary")
22
- parser.add_argument("--maxload", type=int, default=200000)
23
- parser.add_argument("--nomatch", action='store_true', help="no exact match in lexicon")
24
- params = parser.parse_args()
25
-
26
-
27
- ###### SPECIFIC FUNCTIONS ######
28
- # function specific to evaluation
29
- # the rest of the functions are in utils.py
30
-
31
- def load_transform(fname, d1=300, d2=300):
32
- fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
33
- R = np.zeros([d1, d2])
34
- for i, line in enumerate(fin):
35
- tokens = line.split(' ')
36
- R[i, :] = np.array(tokens[0:d2], dtype=float)
37
- return R
38
-
39
-
40
- ###### MAIN ######
41
-
42
- print("Evaluation of alignment on %s" % params.dico_test)
43
- if params.nomatch:
44
- print("running without exact string matches")
45
-
46
- words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
47
- words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
48
-
49
- if params.tgt_mat != "":
50
- R_tgt = load_transform(params.tgt_mat)
51
- x_tgt = np.dot(x_tgt, R_tgt)
52
- if params.src_mat != "":
53
- R_src = load_transform(params.src_mat)
54
- x_src = np.dot(x_src, R_src)
55
-
56
- src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
57
-
58
- nnacc = compute_nn_accuracy(x_src, x_tgt, src2tgt, lexicon_size=lexicon_size)
59
- cslsproc = compute_csls_accuracy(x_src, x_tgt, src2tgt, lexicon_size=lexicon_size)
60
- print("NN = %.4f - CSLS = %.4f - Coverage = %.4f" % (nnacc, cslsproc, len(src2tgt) / lexicon_size))
@@ -1,51 +0,0 @@
1
- #!/bin/usr/env sh
2
- # Copyright (c) 2018-present, Facebook, Inc.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- set -e
9
- s=${1:-en}
10
- t=${2:-es}
11
- echo "Example based on the ${s}->${t} alignment"
12
-
13
- if [ ! -d data/ ]; then
14
- mkdir -p data;
15
- fi
16
-
17
- if [ ! -d res/ ]; then
18
- mkdir -p res;
19
- fi
20
-
21
- dico_train=data/${s}-${t}.0-5000.txt
22
- if [ ! -f "${dico_train}" ]; then
23
- DICO=$(basename -- "${dico_train}")
24
- wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
25
- fi
26
-
27
- dico_test=data/${s}-${t}.5000-6500.txt
28
- if [ ! -f "${dico_test}" ]; then
29
- DICO=$(basename -- "${dico_test}")
30
- wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
31
- fi
32
-
33
- src_emb=data/wiki.${s}.vec
34
- if [ ! -f "${src_emb}" ]; then
35
- EMB=$(basename -- "${src_emb}")
36
- wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
37
- fi
38
-
39
- tgt_emb=data/wiki.${t}.vec
40
- if [ ! -f "${tgt_emb}" ]; then
41
- EMB=$(basename -- "${tgt_emb}")
42
- wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
43
- fi
44
-
45
- output=res/wiki.${s}-${t}.vec
46
-
47
- python3 align.py --src_emb "${src_emb}" --tgt_emb "${tgt_emb}" \
48
- --dico_train "${dico_train}" --dico_test "${dico_test}" --output "${output}" \
49
- --lr 25 --niter 10
50
- python3 eval.py --src_emb "${output}" --tgt_emb "${tgt_emb}" \
51
- --dico_test "${dico_test}"