fasttext 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (478) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +6 -0
  3. data/README.md +20 -1
  4. data/lib/fasttext.rb +3 -0
  5. data/lib/fasttext/classifier.rb +12 -4
  6. data/lib/fasttext/vectorizer.rb +1 -1
  7. data/lib/fasttext/version.rb +1 -1
  8. metadata +4 -473
  9. data/lib/fasttext/ext.bundle +0 -0
  10. data/vendor/fastText/CMakeLists.txt +0 -68
  11. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  12. data/vendor/fastText/CONTRIBUTING.md +0 -32
  13. data/vendor/fastText/MANIFEST.in +0 -5
  14. data/vendor/fastText/Makefile +0 -63
  15. data/vendor/fastText/alignment/README.md +0 -53
  16. data/vendor/fastText/alignment/align.py +0 -145
  17. data/vendor/fastText/alignment/eval.py +0 -60
  18. data/vendor/fastText/alignment/example.sh +0 -51
  19. data/vendor/fastText/alignment/unsup_align.py +0 -109
  20. data/vendor/fastText/alignment/utils.py +0 -154
  21. data/vendor/fastText/classification-example.sh +0 -41
  22. data/vendor/fastText/classification-results.sh +0 -94
  23. data/vendor/fastText/crawl/README.md +0 -26
  24. data/vendor/fastText/crawl/dedup.cc +0 -51
  25. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  26. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  27. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  28. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  29. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  30. data/vendor/fastText/docs/api.md +0 -6
  31. data/vendor/fastText/docs/cheatsheet.md +0 -66
  32. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  33. data/vendor/fastText/docs/dataset.md +0 -6
  34. data/vendor/fastText/docs/english-vectors.md +0 -53
  35. data/vendor/fastText/docs/faqs.md +0 -63
  36. data/vendor/fastText/docs/language-identification.md +0 -47
  37. data/vendor/fastText/docs/options.md +0 -50
  38. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  39. data/vendor/fastText/docs/python-module.md +0 -314
  40. data/vendor/fastText/docs/references.md +0 -41
  41. data/vendor/fastText/docs/supervised-models.md +0 -54
  42. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  43. data/vendor/fastText/docs/support.md +0 -58
  44. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  45. data/vendor/fastText/eval.py +0 -95
  46. data/vendor/fastText/get-wikimedia.sh +0 -79
  47. data/vendor/fastText/python/README.md +0 -322
  48. data/vendor/fastText/python/README.rst +0 -406
  49. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  50. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  51. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  52. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  53. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  54. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  55. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  56. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  57. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  58. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  59. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  60. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  61. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  62. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  63. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  64. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  65. data/vendor/fastText/quantization-example.sh +0 -40
  66. data/vendor/fastText/runtests.py +0 -60
  67. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  68. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  69. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  70. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  71. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  72. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  73. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  74. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  75. data/vendor/fastText/setup.cfg +0 -2
  76. data/vendor/fastText/setup.py +0 -203
  77. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  78. data/vendor/fastText/website/README.md +0 -6
  79. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  80. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  81. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  82. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  83. data/vendor/fastText/website/core/Footer.js +0 -127
  84. data/vendor/fastText/website/package.json +0 -12
  85. data/vendor/fastText/website/pages/en/index.js +0 -286
  86. data/vendor/fastText/website/sidebars.json +0 -18
  87. data/vendor/fastText/website/siteConfig.js +0 -102
  88. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  89. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  90. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  91. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  92. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  93. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  94. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  95. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  96. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  97. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  98. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  99. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  100. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  101. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  102. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  103. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  104. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  105. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  106. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  107. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  108. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  109. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  110. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  111. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  112. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  113. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  114. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  115. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  116. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  121. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  122. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  123. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  124. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  125. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  126. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  127. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  128. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  129. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  130. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  131. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  132. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  133. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  134. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  135. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  136. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  137. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  138. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  139. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  140. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  141. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  142. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  143. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  144. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  145. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  146. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  147. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  148. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  149. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  150. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  151. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  152. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  153. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  154. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  155. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  156. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  157. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  158. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  159. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  160. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  161. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  162. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  163. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  164. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  165. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  166. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  167. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  168. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  169. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  170. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  171. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  172. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  173. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  174. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  175. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  176. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  177. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  178. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  179. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  180. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  181. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  182. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  183. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  184. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  185. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  186. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  187. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  188. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  189. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  190. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  191. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  192. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  193. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  194. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  195. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  196. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  197. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  198. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  199. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  200. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  201. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  202. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  203. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  204. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  205. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  206. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  207. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  208. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  209. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  210. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  211. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  212. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  213. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  214. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  215. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  216. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  217. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  218. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  219. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  220. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  221. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  222. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  223. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  224. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  225. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  226. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  227. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  228. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  229. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  230. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  232. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  233. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  237. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  241. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  243. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  245. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  247. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  249. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  251. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  253. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  255. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  257. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  259. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  261. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  263. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  265. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  267. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  269. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  271. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  273. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  275. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  277. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  279. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  280. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  281. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  282. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  283. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  284. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  285. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  286. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  287. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  288. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  289. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  290. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  291. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  292. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  293. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  294. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  295. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  296. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  297. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  298. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  299. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  300. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  302. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  304. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  308. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  310. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  312. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  314. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  316. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  318. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  322. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  324. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  326. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  332. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  338. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  342. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  372. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  373. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  374. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  375. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  376. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  377. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  378. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  379. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  380. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  381. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  382. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  384. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  386. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  388. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  390. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  392. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  393. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  394. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  395. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  396. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  397. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  398. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  399. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  400. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  401. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  402. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  404. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  426. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  427. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  428. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  429. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  430. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  431. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  432. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  433. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  434. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  435. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  436. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  437. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  438. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  439. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  440. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  441. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  442. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  443. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  444. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  445. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  446. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  447. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  448. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  449. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  450. data/vendor/fastText/website/static/fasttext.css +0 -48
  451. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  452. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  453. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  454. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  455. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  456. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  457. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  458. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  459. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  460. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  461. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  462. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  463. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  464. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  465. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  466. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  467. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  468. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  469. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  470. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  471. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  472. data/vendor/fastText/website/static/img/model-black.png +0 -0
  473. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  474. data/vendor/fastText/website/static/img/model-red.png +0 -0
  475. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  476. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  477. data/vendor/fastText/wikifil.pl +0 -57
  478. data/vendor/fastText/word-vector-example.sh +0 -39
Binary file
@@ -1,68 +0,0 @@
1
- #
2
- # Copyright (c) 2016-present, Facebook, Inc.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the MIT license found in the
6
- # LICENSE file in the root directory of this source tree.
7
- #
8
-
9
- cmake_minimum_required(VERSION 2.8.9)
10
- project(fasttext)
11
-
12
- # The version number.
13
- set (fasttext_VERSION_MAJOR 0)
14
- set (fasttext_VERSION_MINOR 1)
15
-
16
- include_directories(fasttext)
17
-
18
- set(CMAKE_CXX_FLAGS " -pthread -std=c++11 -funroll-loops -O3 -march=native")
19
-
20
- set(HEADER_FILES
21
- src/args.h
22
- src/densematrix.h
23
- src/dictionary.h
24
- src/fasttext.h
25
- src/loss.h
26
- src/matrix.h
27
- src/meter.h
28
- src/model.h
29
- src/productquantizer.h
30
- src/quantmatrix.h
31
- src/real.h
32
- src/utils.h
33
- src/vector.h)
34
-
35
- set(SOURCE_FILES
36
- src/args.cc
37
- src/densematrix.cc
38
- src/dictionary.cc
39
- src/fasttext.cc
40
- src/loss.cc
41
- src/main.cc
42
- src/matrix.cc
43
- src/meter.cc
44
- src/model.cc
45
- src/productquantizer.cc
46
- src/quantmatrix.cc
47
- src/utils.cc
48
- src/vector.cc)
49
-
50
- add_library(fasttext-shared SHARED ${SOURCE_FILES} ${HEADER_FILES})
51
- add_library(fasttext-static STATIC ${SOURCE_FILES} ${HEADER_FILES})
52
- add_library(fasttext-static_pic STATIC ${SOURCE_FILES} ${HEADER_FILES})
53
- set_target_properties(fasttext-shared PROPERTIES OUTPUT_NAME fasttext)
54
- set_target_properties(fasttext-static PROPERTIES OUTPUT_NAME fasttext)
55
- set_target_properties(fasttext-static_pic PROPERTIES OUTPUT_NAME fasttext_pic
56
- POSITION_INDEPENDENT_CODE True)
57
- add_executable(fasttext-bin src/main.cc)
58
- target_link_libraries(fasttext-bin pthread fasttext-static)
59
- set_target_properties(fasttext-bin PROPERTIES PUBLIC_HEADER "${HEADER_FILES}" OUTPUT_NAME fasttext)
60
- install (TARGETS fasttext-shared
61
- LIBRARY DESTINATION lib)
62
- install (TARGETS fasttext-static
63
- ARCHIVE DESTINATION lib)
64
- install (TARGETS fasttext-static_pic
65
- ARCHIVE DESTINATION lib)
66
- install (TARGETS fasttext-bin
67
- RUNTIME DESTINATION bin
68
- PUBLIC_HEADER DESTINATION include/fasttext)
@@ -1,2 +0,0 @@
1
- # Code of Conduct
2
- Facebook has adopted a Code of Conduct that we expect project participants to adhere to. Please [read the full text](https://code.fb.com/codeofconduct) so that you can understand what actions will and will not be tolerated.
@@ -1,32 +0,0 @@
1
- # Contributing to fastText
2
- We want to make contributing to this project as easy and transparent as possible.
3
-
4
- ## Issues
5
- We use GitHub issues to track public bugs. Please ensure your description is clear and has sufficient instructions to be able to reproduce the issue.
6
-
7
- ### Reproducing issues
8
- Please make sure that the issue you mention is not a result of one of the existing third-party libraries. For example, please do not post an issue if you encountered an error within a third-party Python library. We can only help you with errors which can be directly reproduced either with our C++ code or the corresponding Python bindings. If you do find an error, please post detailed steps to reproduce it. If we can't reproduce your error, we can't help you fix it.
9
-
10
- ## Pull Requests
11
- Please post an Issue before submitting a pull request. This might save you some time as it is possible we can't support your contribution, albeit we try our best to accomodate your (planned) work and highly appreciate your time. Generally, it is best to have a pull request emerge from an issue rather than the other way around.
12
-
13
- To create a pull request:
14
-
15
- 1. Fork the repo and create your branch from `master`.
16
- 2. If you've added code that should be tested, add tests.
17
- 3. If you've changed APIs, update the documentation.
18
- 4. Ensure the test suite passes.
19
- 5. Make sure your code lints.
20
- 6. If you haven't already, complete the Contributor License Agreement ("CLA").
21
-
22
- ## Tests
23
- First, you will need to make sure you have the required data. For that, please have a look at the fetch_test_data.sh script under tests. Next run the tests using the runtests.py script passing a path to the directory containing the datasets.
24
-
25
- ## Contributor License Agreement ("CLA")
26
- In order to accept your pull request, we need you to submit a CLA. You only need
27
- to do this once to work on any of Facebook's open source projects.
28
-
29
- Complete your CLA here: <https://code.facebook.com/cla>
30
-
31
- ## License
32
- By contributing to fastText, you agree that your contributions will be licensed under its MIT license.
@@ -1,5 +0,0 @@
1
- include LICENSE
2
- include PATENTS
3
-
4
- recursive-include python *.md *.rst
5
- recursive-include src *.h
@@ -1,63 +0,0 @@
1
- #
2
- # Copyright (c) 2016-present, Facebook, Inc.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the MIT license found in the
6
- # LICENSE file in the root directory of this source tree.
7
- #
8
-
9
- CXX = c++
10
- CXXFLAGS = -pthread -std=c++0x -march=native
11
- OBJS = args.o matrix.o dictionary.o loss.o productquantizer.o densematrix.o quantmatrix.o vector.o model.o utils.o meter.o fasttext.o
12
- INCLUDES = -I.
13
-
14
- opt: CXXFLAGS += -O3 -funroll-loops -DNDEBUG
15
- opt: fasttext
16
-
17
- coverage: CXXFLAGS += -O0 -fno-inline -fprofile-arcs --coverage
18
- coverage: fasttext
19
-
20
- debug: CXXFLAGS += -g -O0 -fno-inline
21
- debug: fasttext
22
-
23
- args.o: src/args.cc src/args.h
24
- $(CXX) $(CXXFLAGS) -c src/args.cc
25
-
26
- matrix.o: src/matrix.cc src/matrix.h
27
- $(CXX) $(CXXFLAGS) -c src/matrix.cc
28
-
29
- dictionary.o: src/dictionary.cc src/dictionary.h src/args.h
30
- $(CXX) $(CXXFLAGS) -c src/dictionary.cc
31
-
32
- loss.o: src/loss.cc src/loss.h src/matrix.h src/real.h
33
- $(CXX) $(CXXFLAGS) -c src/loss.cc
34
-
35
- productquantizer.o: src/productquantizer.cc src/productquantizer.h src/utils.h
36
- $(CXX) $(CXXFLAGS) -c src/productquantizer.cc
37
-
38
- densematrix.o: src/densematrix.cc src/densematrix.h src/utils.h src/matrix.h
39
- $(CXX) $(CXXFLAGS) -c src/densematrix.cc
40
-
41
- quantmatrix.o: src/quantmatrix.cc src/quantmatrix.h src/utils.h src/matrix.h
42
- $(CXX) $(CXXFLAGS) -c src/quantmatrix.cc
43
-
44
- vector.o: src/vector.cc src/vector.h src/utils.h
45
- $(CXX) $(CXXFLAGS) -c src/vector.cc
46
-
47
- model.o: src/model.cc src/model.h src/args.h
48
- $(CXX) $(CXXFLAGS) -c src/model.cc
49
-
50
- utils.o: src/utils.cc src/utils.h
51
- $(CXX) $(CXXFLAGS) -c src/utils.cc
52
-
53
- meter.o: src/meter.cc src/meter.h
54
- $(CXX) $(CXXFLAGS) -c src/meter.cc
55
-
56
- fasttext.o: src/fasttext.cc src/*.h
57
- $(CXX) $(CXXFLAGS) -c src/fasttext.cc
58
-
59
- fasttext: $(OBJS) src/fasttext.cc
60
- $(CXX) $(CXXFLAGS) $(OBJS) src/main.cc -o fasttext
61
-
62
- clean:
63
- rm -rf *.o *.gcno *.gcda fasttext
@@ -1,53 +0,0 @@
1
- ## Alignment of Word Embeddings
2
-
3
- This directory provides code for learning alignments between word embeddings in different languages.
4
-
5
- The code is in Python 3 and requires [NumPy](http://www.numpy.org/).
6
-
7
- The script `example.sh` shows how to use this code to learn and evaluate a bilingual alignment of word embeddings.
8
-
9
- The word embeddings used in [1] can be found on the [fastText project page](https://fasttext.cc) and the supervised bilingual lexicons on the [MUSE project page](https://github.com/facebookresearch/MUSE).
10
-
11
- ### Supervised alignment
12
-
13
- The script `align.py` aligns word embeddings from two languages using a bilingual lexicon as supervision.
14
- The details of this approach can be found in [1].
15
-
16
- ### Unsupervised alignment
17
-
18
- The script `unsup_align.py` aligns word embeddings from two languages without requiring any supervision.
19
- The details of this approach can be found in [2].
20
-
21
- In addition to NumPy, the unsupervised method requires the [Python Optimal Transport](https://pot.readthedocs.io/en/stable/) toolbox.
22
-
23
- ### Download
24
-
25
- Wikipedia fastText embeddings aligned with our method can be found [here](https://fasttext.cc/docs/en/aligned-vectors.html).
26
-
27
- ### References
28
-
29
- If you use the supervised alignment method, please cite:
30
-
31
- [1] A. Joulin, P. Bojanowski, T. Mikolov, H. Jegou, E. Grave, [*Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion*](https://arxiv.org/abs/1804.07745)
32
-
33
- ```
34
- @InProceedings{joulin2018loss,
35
- title={Loss in Translation: Learning Bilingual Word Mapping with a Retrieval Criterion},
36
- author={Joulin, Armand and Bojanowski, Piotr and Mikolov, Tomas and J\'egou, Herv\'e and Grave, Edouard},
37
- year={2018},
38
- booktitle={Proceedings of the 2018 Conference on Empirical Methods in Natural Language Processing},
39
- }
40
- ```
41
-
42
- If you use the unsupervised alignment method, please cite:
43
-
44
- [2] E. Grave, A. Joulin, Q. Berthet, [*Unsupervised Alignment of Embeddings with Wasserstein Procrustes*](https://arxiv.org/abs/1805.11222)
45
-
46
- ```
47
- @article{grave2018unsupervised,
48
- title={Unsupervised Alignment of Embeddings with Wasserstein Procrustes},
49
- author={Grave, Edouard and Joulin, Armand and Berthet, Quentin},
50
- journal={arXiv preprint arXiv:1805.11222},
51
- year={2018}
52
- }
53
- ```
@@ -1,145 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- #
4
- # Copyright (c) 2018-present, Facebook, Inc.
5
- # All rights reserved.
6
- #
7
- # This source code is licensed under the license found in the
8
- # LICENSE file in the root directory of this source tree.
9
-
10
- import numpy as np
11
- import argparse
12
- from utils import *
13
- import sys
14
-
15
- parser = argparse.ArgumentParser(description='RCSLS for supervised word alignment')
16
-
17
- parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
18
- parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
19
- parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
20
-
21
- parser.add_argument("--dico_train", type=str, default='', help="train dictionary")
22
- parser.add_argument("--dico_test", type=str, default='', help="validation dictionary")
23
-
24
- parser.add_argument("--output", type=str, default='', help="where to save aligned embeddings")
25
-
26
- parser.add_argument("--knn", type=int, default=10, help="number of nearest neighbors in RCSL/CSLS")
27
- parser.add_argument("--maxneg", type=int, default=200000, help="Maximum number of negatives for the Extended RCSLS")
28
- parser.add_argument("--maxsup", type=int, default=-1, help="Maximum number of training examples")
29
- parser.add_argument("--maxload", type=int, default=200000, help="Maximum number of loaded vectors")
30
-
31
- parser.add_argument("--model", type=str, default="none", help="Set of constraints: spectral or none")
32
- parser.add_argument("--reg", type=float, default=0.0 , help='regularization parameters')
33
-
34
- parser.add_argument("--lr", type=float, default=1.0, help='learning rate')
35
- parser.add_argument("--niter", type=int, default=10, help='number of iterations')
36
- parser.add_argument('--sgd', action='store_true', help='use sgd')
37
- parser.add_argument("--batchsize", type=int, default=10000, help="batch size for sgd")
38
-
39
- params = parser.parse_args()
40
-
41
- ###### SPECIFIC FUNCTIONS ######
42
- # functions specific to RCSLS
43
- # the rest of the functions are in utils.py
44
-
45
- def getknn(sc, x, y, k=10):
46
- sidx = np.argpartition(sc, -k, axis=1)[:, -k:]
47
- ytopk = y[sidx.flatten(), :]
48
- ytopk = ytopk.reshape(sidx.shape[0], sidx.shape[1], y.shape[1])
49
- f = np.sum(sc[np.arange(sc.shape[0])[:, None], sidx])
50
- df = np.dot(ytopk.sum(1).T, x)
51
- return f / k, df / k
52
-
53
-
54
- def rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, knn=10):
55
- X_trans = np.dot(X_src, R.T)
56
- f = 2 * np.sum(X_trans * Y_tgt)
57
- df = 2 * np.dot(Y_tgt.T, X_src)
58
- fk0, dfk0 = getknn(np.dot(X_trans, Z_tgt.T), X_src, Z_tgt, knn)
59
- fk1, dfk1 = getknn(np.dot(np.dot(Z_src, R.T), Y_tgt.T).T, Y_tgt, Z_src, knn)
60
- f = f - fk0 -fk1
61
- df = df - dfk0 - dfk1.T
62
- return -f / X_src.shape[0], -df / X_src.shape[0]
63
-
64
-
65
- def proj_spectral(R):
66
- U, s, V = np.linalg.svd(R)
67
- s[s > 1] = 1
68
- s[s < 0] = 0
69
- return np.dot(U, np.dot(np.diag(s), V))
70
-
71
-
72
- ###### MAIN ######
73
-
74
- # load word embeddings
75
- words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
76
- words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
77
-
78
- # load validation bilingual lexicon
79
- src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
80
-
81
- # word --> vector indices
82
- idx_src = idx(words_src)
83
- idx_tgt = idx(words_tgt)
84
-
85
- # load train bilingual lexicon
86
- pairs = load_pairs(params.dico_train, idx_src, idx_tgt)
87
- if params.maxsup > 0 and params.maxsup < len(pairs):
88
- pairs = pairs[:params.maxsup]
89
-
90
- # selecting training vector pairs
91
- X_src, Y_tgt = select_vectors_from_pairs(x_src, x_tgt, pairs)
92
-
93
- # adding negatives for RCSLS
94
- Z_src = x_src[:params.maxneg, :]
95
- Z_tgt = x_tgt[:params.maxneg, :]
96
-
97
- # initialization:
98
- R = procrustes(X_src, Y_tgt)
99
- nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
100
- print("[init -- Procrustes] NN: %.4f"%(nnacc))
101
- sys.stdout.flush()
102
-
103
- # optimization
104
- fold, Rold = 0, []
105
- niter, lr = params.niter, params.lr
106
-
107
- for it in range(0, niter + 1):
108
- if lr < 1e-4:
109
- break
110
-
111
- if params.sgd:
112
- indices = np.random.choice(X_src.shape[0], size=params.batchsize, replace=False)
113
- f, df = rcsls(X_src[indices, :], Y_tgt[indices, :], Z_src, Z_tgt, R, params.knn)
114
- else:
115
- f, df = rcsls(X_src, Y_tgt, Z_src, Z_tgt, R, params.knn)
116
-
117
- if params.reg > 0:
118
- R *= (1 - lr * params.reg)
119
- R -= lr * df
120
- if params.model == "spectral":
121
- R = proj_spectral(R)
122
-
123
- print("[it=%d] f = %.4f" % (it, f))
124
- sys.stdout.flush()
125
-
126
- if f > fold and it > 0 and not params.sgd:
127
- lr /= 2
128
- f, R = fold, Rold
129
-
130
- fold, Rold = f, R
131
-
132
- if (it > 0 and it % 10 == 0) or it == niter:
133
- nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
134
- print("[it=%d] NN = %.4f - Coverage = %.4f" % (it, nnacc, len(src2tgt) / lexicon_size))
135
-
136
- nnacc = compute_nn_accuracy(np.dot(x_src, R.T), x_tgt, src2tgt, lexicon_size=lexicon_size)
137
- print("[final] NN = %.4f - Coverage = %.4f" % (nnacc, len(src2tgt) / lexicon_size))
138
-
139
- if params.output != "":
140
- print("Saving all aligned vectors at %s" % params.output)
141
- words_full, x_full = load_vectors(params.src_emb, maxload=-1, center=params.center, verbose=False)
142
- x = np.dot(x_full, R.T)
143
- x /= np.linalg.norm(x, axis=1)[:, np.newaxis] + 1e-8
144
- save_vectors(params.output, x, words_full)
145
- save_matrix(params.output + "-mat", R)
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env python3
2
- # -*- coding: utf-8 -*-
3
- #
4
- # Copyright (c) 2018-present, Facebook, Inc.
5
- # All rights reserved.
6
- #
7
- # This source code is licensed under the license found in the
8
- # LICENSE file in the root directory of this source tree.
9
-
10
- import io
11
- import numpy as np
12
- import argparse
13
- from utils import *
14
-
15
- parser = argparse.ArgumentParser(description='Evaluation of word alignment')
16
- parser.add_argument("--src_emb", type=str, default='', help="Load source embeddings")
17
- parser.add_argument("--tgt_emb", type=str, default='', help="Load target embeddings")
18
- parser.add_argument('--center', action='store_true', help='whether to center embeddings or not')
19
- parser.add_argument("--src_mat", type=str, default='', help="Load source alignment matrix. If none given, the aligment matrix is the identity.")
20
- parser.add_argument("--tgt_mat", type=str, default='', help="Load target alignment matrix. If none given, the aligment matrix is the identity.")
21
- parser.add_argument("--dico_test", type=str, default='', help="test dictionary")
22
- parser.add_argument("--maxload", type=int, default=200000)
23
- parser.add_argument("--nomatch", action='store_true', help="no exact match in lexicon")
24
- params = parser.parse_args()
25
-
26
-
27
- ###### SPECIFIC FUNCTIONS ######
28
- # function specific to evaluation
29
- # the rest of the functions are in utils.py
30
-
31
- def load_transform(fname, d1=300, d2=300):
32
- fin = io.open(fname, 'r', encoding='utf-8', newline='\n', errors='ignore')
33
- R = np.zeros([d1, d2])
34
- for i, line in enumerate(fin):
35
- tokens = line.split(' ')
36
- R[i, :] = np.array(tokens[0:d2], dtype=float)
37
- return R
38
-
39
-
40
- ###### MAIN ######
41
-
42
- print("Evaluation of alignment on %s" % params.dico_test)
43
- if params.nomatch:
44
- print("running without exact string matches")
45
-
46
- words_tgt, x_tgt = load_vectors(params.tgt_emb, maxload=params.maxload, center=params.center)
47
- words_src, x_src = load_vectors(params.src_emb, maxload=params.maxload, center=params.center)
48
-
49
- if params.tgt_mat != "":
50
- R_tgt = load_transform(params.tgt_mat)
51
- x_tgt = np.dot(x_tgt, R_tgt)
52
- if params.src_mat != "":
53
- R_src = load_transform(params.src_mat)
54
- x_src = np.dot(x_src, R_src)
55
-
56
- src2tgt, lexicon_size = load_lexicon(params.dico_test, words_src, words_tgt)
57
-
58
- nnacc = compute_nn_accuracy(x_src, x_tgt, src2tgt, lexicon_size=lexicon_size)
59
- cslsproc = compute_csls_accuracy(x_src, x_tgt, src2tgt, lexicon_size=lexicon_size)
60
- print("NN = %.4f - CSLS = %.4f - Coverage = %.4f" % (nnacc, cslsproc, len(src2tgt) / lexicon_size))
@@ -1,51 +0,0 @@
1
- #!/bin/usr/env sh
2
- # Copyright (c) 2018-present, Facebook, Inc.
3
- # All rights reserved.
4
- #
5
- # This source code is licensed under the license found in the
6
- # LICENSE file in the root directory of this source tree.
7
-
8
- set -e
9
- s=${1:-en}
10
- t=${2:-es}
11
- echo "Example based on the ${s}->${t} alignment"
12
-
13
- if [ ! -d data/ ]; then
14
- mkdir -p data;
15
- fi
16
-
17
- if [ ! -d res/ ]; then
18
- mkdir -p res;
19
- fi
20
-
21
- dico_train=data/${s}-${t}.0-5000.txt
22
- if [ ! -f "${dico_train}" ]; then
23
- DICO=$(basename -- "${dico_train}")
24
- wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
25
- fi
26
-
27
- dico_test=data/${s}-${t}.5000-6500.txt
28
- if [ ! -f "${dico_test}" ]; then
29
- DICO=$(basename -- "${dico_test}")
30
- wget -c "https://dl.fbaipublicfiles.com/arrival/dictionaries/${DICO}" -P data/
31
- fi
32
-
33
- src_emb=data/wiki.${s}.vec
34
- if [ ! -f "${src_emb}" ]; then
35
- EMB=$(basename -- "${src_emb}")
36
- wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
37
- fi
38
-
39
- tgt_emb=data/wiki.${t}.vec
40
- if [ ! -f "${tgt_emb}" ]; then
41
- EMB=$(basename -- "${tgt_emb}")
42
- wget -c "https://dl.fbaipublicfiles.com/fasttext/vectors-wiki/${EMB}" -P data/
43
- fi
44
-
45
- output=res/wiki.${s}-${t}.vec
46
-
47
- python3 align.py --src_emb "${src_emb}" --tgt_emb "${tgt_emb}" \
48
- --dico_train "${dico_train}" --dico_test "${dico_test}" --output "${output}" \
49
- --lr 25 --niter 10
50
- python3 eval.py --src_emb "${output}" --tgt_emb "${tgt_emb}" \
51
- --dico_test "${dico_test}"