fasttext 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (498) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +26 -1
  3. data/LICENSE.txt +18 -18
  4. data/README.md +39 -12
  5. data/ext/fasttext/ext.cpp +108 -101
  6. data/ext/fasttext/extconf.rb +7 -9
  7. data/lib/fasttext.rb +3 -0
  8. data/lib/fasttext/classifier.rb +25 -7
  9. data/lib/fasttext/vectorizer.rb +7 -2
  10. data/lib/fasttext/version.rb +1 -1
  11. data/vendor/fastText/README.md +3 -3
  12. data/vendor/fastText/src/args.cc +179 -6
  13. data/vendor/fastText/src/args.h +29 -1
  14. data/vendor/fastText/src/autotune.cc +477 -0
  15. data/vendor/fastText/src/autotune.h +89 -0
  16. data/vendor/fastText/src/densematrix.cc +27 -7
  17. data/vendor/fastText/src/densematrix.h +10 -2
  18. data/vendor/fastText/src/fasttext.cc +125 -114
  19. data/vendor/fastText/src/fasttext.h +31 -52
  20. data/vendor/fastText/src/main.cc +32 -13
  21. data/vendor/fastText/src/meter.cc +148 -2
  22. data/vendor/fastText/src/meter.h +24 -2
  23. data/vendor/fastText/src/model.cc +0 -1
  24. data/vendor/fastText/src/real.h +0 -1
  25. data/vendor/fastText/src/utils.cc +25 -0
  26. data/vendor/fastText/src/utils.h +29 -0
  27. data/vendor/fastText/src/vector.cc +0 -1
  28. metadata +16 -539
  29. data/lib/fasttext/ext.bundle +0 -0
  30. data/vendor/fastText/CMakeLists.txt +0 -68
  31. data/vendor/fastText/CODE_OF_CONDUCT.md +0 -2
  32. data/vendor/fastText/CONTRIBUTING.md +0 -32
  33. data/vendor/fastText/MANIFEST.in +0 -5
  34. data/vendor/fastText/Makefile +0 -63
  35. data/vendor/fastText/alignment/README.md +0 -53
  36. data/vendor/fastText/alignment/align.py +0 -145
  37. data/vendor/fastText/alignment/eval.py +0 -60
  38. data/vendor/fastText/alignment/example.sh +0 -51
  39. data/vendor/fastText/alignment/unsup_align.py +0 -109
  40. data/vendor/fastText/alignment/utils.py +0 -154
  41. data/vendor/fastText/classification-example.sh +0 -41
  42. data/vendor/fastText/classification-results.sh +0 -94
  43. data/vendor/fastText/crawl/README.md +0 -26
  44. data/vendor/fastText/crawl/dedup.cc +0 -51
  45. data/vendor/fastText/crawl/download_crawl.sh +0 -57
  46. data/vendor/fastText/crawl/filter_dedup.sh +0 -13
  47. data/vendor/fastText/crawl/filter_utf8.cc +0 -105
  48. data/vendor/fastText/crawl/process_wet_file.sh +0 -30
  49. data/vendor/fastText/docs/aligned-vectors.md +0 -64
  50. data/vendor/fastText/docs/api.md +0 -6
  51. data/vendor/fastText/docs/cheatsheet.md +0 -66
  52. data/vendor/fastText/docs/crawl-vectors.md +0 -125
  53. data/vendor/fastText/docs/dataset.md +0 -6
  54. data/vendor/fastText/docs/english-vectors.md +0 -53
  55. data/vendor/fastText/docs/faqs.md +0 -63
  56. data/vendor/fastText/docs/language-identification.md +0 -47
  57. data/vendor/fastText/docs/options.md +0 -50
  58. data/vendor/fastText/docs/pretrained-vectors.md +0 -142
  59. data/vendor/fastText/docs/python-module.md +0 -314
  60. data/vendor/fastText/docs/references.md +0 -41
  61. data/vendor/fastText/docs/supervised-models.md +0 -54
  62. data/vendor/fastText/docs/supervised-tutorial.md +0 -349
  63. data/vendor/fastText/docs/support.md +0 -58
  64. data/vendor/fastText/docs/unsupervised-tutorials.md +0 -309
  65. data/vendor/fastText/eval.py +0 -95
  66. data/vendor/fastText/get-wikimedia.sh +0 -79
  67. data/vendor/fastText/python/README.md +0 -322
  68. data/vendor/fastText/python/README.rst +0 -406
  69. data/vendor/fastText/python/benchmarks/README.rst +0 -3
  70. data/vendor/fastText/python/benchmarks/get_word_vector.py +0 -49
  71. data/vendor/fastText/python/doc/examples/FastTextEmbeddingBag.py +0 -81
  72. data/vendor/fastText/python/doc/examples/bin_to_vec.py +0 -41
  73. data/vendor/fastText/python/doc/examples/compute_accuracy.py +0 -163
  74. data/vendor/fastText/python/doc/examples/get_vocab.py +0 -48
  75. data/vendor/fastText/python/doc/examples/train_supervised.py +0 -42
  76. data/vendor/fastText/python/doc/examples/train_unsupervised.py +0 -56
  77. data/vendor/fastText/python/fasttext_module/fasttext/FastText.py +0 -468
  78. data/vendor/fastText/python/fasttext_module/fasttext/__init__.py +0 -22
  79. data/vendor/fastText/python/fasttext_module/fasttext/pybind/fasttext_pybind.cc +0 -388
  80. data/vendor/fastText/python/fasttext_module/fasttext/tests/__init__.py +0 -14
  81. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_configurations.py +0 -239
  82. data/vendor/fastText/python/fasttext_module/fasttext/tests/test_script.py +0 -629
  83. data/vendor/fastText/python/fasttext_module/fasttext/util/__init__.py +0 -13
  84. data/vendor/fastText/python/fasttext_module/fasttext/util/util.py +0 -60
  85. data/vendor/fastText/quantization-example.sh +0 -40
  86. data/vendor/fastText/runtests.py +0 -60
  87. data/vendor/fastText/scripts/kbcompletion/README.md +0 -19
  88. data/vendor/fastText/scripts/kbcompletion/data.sh +0 -69
  89. data/vendor/fastText/scripts/kbcompletion/eval.cpp +0 -108
  90. data/vendor/fastText/scripts/kbcompletion/fb15k.sh +0 -49
  91. data/vendor/fastText/scripts/kbcompletion/fb15k237.sh +0 -45
  92. data/vendor/fastText/scripts/kbcompletion/svo.sh +0 -38
  93. data/vendor/fastText/scripts/kbcompletion/wn18.sh +0 -49
  94. data/vendor/fastText/scripts/quantization/quantization-results.sh +0 -43
  95. data/vendor/fastText/setup.cfg +0 -2
  96. data/vendor/fastText/setup.py +0 -203
  97. data/vendor/fastText/tests/fetch_test_data.sh +0 -202
  98. data/vendor/fastText/website/README.md +0 -6
  99. data/vendor/fastText/website/blog/2016-08-18-blog-post.md +0 -42
  100. data/vendor/fastText/website/blog/2017-05-02-blog-post.md +0 -60
  101. data/vendor/fastText/website/blog/2017-10-02-blog-post.md +0 -90
  102. data/vendor/fastText/website/blog/2019-06-25-blog-post.md +0 -168
  103. data/vendor/fastText/website/core/Footer.js +0 -127
  104. data/vendor/fastText/website/package.json +0 -12
  105. data/vendor/fastText/website/pages/en/index.js +0 -286
  106. data/vendor/fastText/website/sidebars.json +0 -18
  107. data/vendor/fastText/website/siteConfig.js +0 -102
  108. data/vendor/fastText/website/static/docs/en/html/annotated.html +0 -115
  109. data/vendor/fastText/website/static/docs/en/html/annotated_dup.js +0 -4
  110. data/vendor/fastText/website/static/docs/en/html/args_8cc.html +0 -113
  111. data/vendor/fastText/website/static/docs/en/html/args_8h.html +0 -134
  112. data/vendor/fastText/website/static/docs/en/html/args_8h.js +0 -14
  113. data/vendor/fastText/website/static/docs/en/html/args_8h_source.html +0 -139
  114. data/vendor/fastText/website/static/docs/en/html/bc_s.png +0 -0
  115. data/vendor/fastText/website/static/docs/en/html/bdwn.png +0 -0
  116. data/vendor/fastText/website/static/docs/en/html/classes.html +0 -121
  117. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args-members.html +0 -140
  118. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.html +0 -753
  119. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Args.js +0 -40
  120. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary-members.html +0 -148
  121. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.html +0 -1266
  122. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Dictionary.js +0 -43
  123. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText-members.html +0 -145
  124. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.html +0 -1149
  125. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1FastText.js +0 -45
  126. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix-members.html +0 -123
  127. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.html +0 -610
  128. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Matrix.js +0 -23
  129. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model-members.html +0 -150
  130. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.html +0 -1400
  131. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Model.js +0 -48
  132. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer-members.html +0 -131
  133. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.html +0 -950
  134. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1ProductQuantizer.js +0 -31
  135. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix-members.html +0 -122
  136. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.html +0 -565
  137. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1QMatrix.js +0 -22
  138. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector-members.html +0 -121
  139. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.html +0 -542
  140. data/vendor/fastText/website/static/docs/en/html/classfasttext_1_1Vector.js +0 -21
  141. data/vendor/fastText/website/static/docs/en/html/closed.png +0 -0
  142. data/vendor/fastText/website/static/docs/en/html/dictionary_8cc.html +0 -116
  143. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.html +0 -142
  144. data/vendor/fastText/website/static/docs/en/html/dictionary_8h.js +0 -10
  145. data/vendor/fastText/website/static/docs/en/html/dictionary_8h_source.html +0 -127
  146. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.html +0 -145
  147. data/vendor/fastText/website/static/docs/en/html/dir_68267d1309a1af8e8297ef4c3efbcdba.js +0 -29
  148. data/vendor/fastText/website/static/docs/en/html/doc.png +0 -0
  149. data/vendor/fastText/website/static/docs/en/html/doxygen.css +0 -1596
  150. data/vendor/fastText/website/static/docs/en/html/doxygen.png +0 -0
  151. data/vendor/fastText/website/static/docs/en/html/dynsections.js +0 -97
  152. data/vendor/fastText/website/static/docs/en/html/fasttext_8cc.html +0 -119
  153. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.html +0 -168
  154. data/vendor/fastText/website/static/docs/en/html/fasttext_8h.js +0 -6
  155. data/vendor/fastText/website/static/docs/en/html/fasttext_8h_source.html +0 -155
  156. data/vendor/fastText/website/static/docs/en/html/favicon.png +0 -0
  157. data/vendor/fastText/website/static/docs/en/html/files.html +0 -125
  158. data/vendor/fastText/website/static/docs/en/html/files.js +0 -4
  159. data/vendor/fastText/website/static/docs/en/html/folderclosed.png +0 -0
  160. data/vendor/fastText/website/static/docs/en/html/folderopen.png +0 -0
  161. data/vendor/fastText/website/static/docs/en/html/functions.html +0 -139
  162. data/vendor/fastText/website/static/docs/en/html/functions_0x7e.html +0 -112
  163. data/vendor/fastText/website/static/docs/en/html/functions_b.html +0 -115
  164. data/vendor/fastText/website/static/docs/en/html/functions_c.html +0 -143
  165. data/vendor/fastText/website/static/docs/en/html/functions_d.html +0 -135
  166. data/vendor/fastText/website/static/docs/en/html/functions_dup.js +0 -27
  167. data/vendor/fastText/website/static/docs/en/html/functions_e.html +0 -115
  168. data/vendor/fastText/website/static/docs/en/html/functions_f.html +0 -112
  169. data/vendor/fastText/website/static/docs/en/html/functions_func.html +0 -563
  170. data/vendor/fastText/website/static/docs/en/html/functions_g.html +0 -145
  171. data/vendor/fastText/website/static/docs/en/html/functions_h.html +0 -112
  172. data/vendor/fastText/website/static/docs/en/html/functions_i.html +0 -121
  173. data/vendor/fastText/website/static/docs/en/html/functions_k.html +0 -106
  174. data/vendor/fastText/website/static/docs/en/html/functions_l.html +0 -140
  175. data/vendor/fastText/website/static/docs/en/html/functions_m.html +0 -153
  176. data/vendor/fastText/website/static/docs/en/html/functions_n.html +0 -164
  177. data/vendor/fastText/website/static/docs/en/html/functions_o.html +0 -116
  178. data/vendor/fastText/website/static/docs/en/html/functions_p.html +0 -161
  179. data/vendor/fastText/website/static/docs/en/html/functions_q.html +0 -135
  180. data/vendor/fastText/website/static/docs/en/html/functions_r.html +0 -116
  181. data/vendor/fastText/website/static/docs/en/html/functions_s.html +0 -159
  182. data/vendor/fastText/website/static/docs/en/html/functions_t.html +0 -138
  183. data/vendor/fastText/website/static/docs/en/html/functions_u.html +0 -106
  184. data/vendor/fastText/website/static/docs/en/html/functions_v.html +0 -106
  185. data/vendor/fastText/website/static/docs/en/html/functions_vars.html +0 -486
  186. data/vendor/fastText/website/static/docs/en/html/functions_w.html +0 -124
  187. data/vendor/fastText/website/static/docs/en/html/functions_z.html +0 -104
  188. data/vendor/fastText/website/static/docs/en/html/globals.html +0 -170
  189. data/vendor/fastText/website/static/docs/en/html/globals_defs.html +0 -113
  190. data/vendor/fastText/website/static/docs/en/html/globals_func.html +0 -155
  191. data/vendor/fastText/website/static/docs/en/html/index.html +0 -100
  192. data/vendor/fastText/website/static/docs/en/html/jquery.js +0 -87
  193. data/vendor/fastText/website/static/docs/en/html/main_8cc.html +0 -582
  194. data/vendor/fastText/website/static/docs/en/html/main_8cc.js +0 -22
  195. data/vendor/fastText/website/static/docs/en/html/matrix_8cc.html +0 -114
  196. data/vendor/fastText/website/static/docs/en/html/matrix_8h.html +0 -121
  197. data/vendor/fastText/website/static/docs/en/html/matrix_8h_source.html +0 -123
  198. data/vendor/fastText/website/static/docs/en/html/menu.js +0 -26
  199. data/vendor/fastText/website/static/docs/en/html/menudata.js +0 -90
  200. data/vendor/fastText/website/static/docs/en/html/model_8cc.html +0 -113
  201. data/vendor/fastText/website/static/docs/en/html/model_8h.html +0 -183
  202. data/vendor/fastText/website/static/docs/en/html/model_8h.js +0 -8
  203. data/vendor/fastText/website/static/docs/en/html/model_8h_source.html +0 -139
  204. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.html +0 -343
  205. data/vendor/fastText/website/static/docs/en/html/namespacefasttext.js +0 -13
  206. data/vendor/fastText/website/static/docs/en/html/namespacefasttext_1_1utils.html +0 -158
  207. data/vendor/fastText/website/static/docs/en/html/namespacemembers.html +0 -125
  208. data/vendor/fastText/website/static/docs/en/html/namespacemembers_enum.html +0 -107
  209. data/vendor/fastText/website/static/docs/en/html/namespacemembers_func.html +0 -110
  210. data/vendor/fastText/website/static/docs/en/html/namespacemembers_type.html +0 -104
  211. data/vendor/fastText/website/static/docs/en/html/namespaces.html +0 -106
  212. data/vendor/fastText/website/static/docs/en/html/namespaces.js +0 -4
  213. data/vendor/fastText/website/static/docs/en/html/nav_f.png +0 -0
  214. data/vendor/fastText/website/static/docs/en/html/nav_g.png +0 -0
  215. data/vendor/fastText/website/static/docs/en/html/nav_h.png +0 -0
  216. data/vendor/fastText/website/static/docs/en/html/navtree.css +0 -146
  217. data/vendor/fastText/website/static/docs/en/html/navtree.js +0 -517
  218. data/vendor/fastText/website/static/docs/en/html/navtreedata.js +0 -40
  219. data/vendor/fastText/website/static/docs/en/html/navtreeindex0.js +0 -253
  220. data/vendor/fastText/website/static/docs/en/html/navtreeindex1.js +0 -139
  221. data/vendor/fastText/website/static/docs/en/html/open.png +0 -0
  222. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.html +0 -118
  223. data/vendor/fastText/website/static/docs/en/html/productquantizer_8cc.js +0 -4
  224. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h.html +0 -124
  225. data/vendor/fastText/website/static/docs/en/html/productquantizer_8h_source.html +0 -133
  226. data/vendor/fastText/website/static/docs/en/html/qmatrix_8cc.html +0 -112
  227. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h.html +0 -126
  228. data/vendor/fastText/website/static/docs/en/html/qmatrix_8h_source.html +0 -128
  229. data/vendor/fastText/website/static/docs/en/html/real_8h.html +0 -117
  230. data/vendor/fastText/website/static/docs/en/html/real_8h.js +0 -4
  231. data/vendor/fastText/website/static/docs/en/html/real_8h_source.html +0 -103
  232. data/vendor/fastText/website/static/docs/en/html/resize.js +0 -114
  233. data/vendor/fastText/website/static/docs/en/html/search/all_0.html +0 -26
  234. data/vendor/fastText/website/static/docs/en/html/search/all_0.js +0 -17
  235. data/vendor/fastText/website/static/docs/en/html/search/all_1.html +0 -26
  236. data/vendor/fastText/website/static/docs/en/html/search/all_1.js +0 -8
  237. data/vendor/fastText/website/static/docs/en/html/search/all_10.html +0 -26
  238. data/vendor/fastText/website/static/docs/en/html/search/all_10.js +0 -10
  239. data/vendor/fastText/website/static/docs/en/html/search/all_11.html +0 -26
  240. data/vendor/fastText/website/static/docs/en/html/search/all_11.js +0 -25
  241. data/vendor/fastText/website/static/docs/en/html/search/all_12.html +0 -26
  242. data/vendor/fastText/website/static/docs/en/html/search/all_12.js +0 -15
  243. data/vendor/fastText/website/static/docs/en/html/search/all_13.html +0 -26
  244. data/vendor/fastText/website/static/docs/en/html/search/all_13.js +0 -7
  245. data/vendor/fastText/website/static/docs/en/html/search/all_14.html +0 -26
  246. data/vendor/fastText/website/static/docs/en/html/search/all_14.js +0 -7
  247. data/vendor/fastText/website/static/docs/en/html/search/all_15.html +0 -26
  248. data/vendor/fastText/website/static/docs/en/html/search/all_15.js +0 -11
  249. data/vendor/fastText/website/static/docs/en/html/search/all_16.html +0 -26
  250. data/vendor/fastText/website/static/docs/en/html/search/all_16.js +0 -4
  251. data/vendor/fastText/website/static/docs/en/html/search/all_17.html +0 -26
  252. data/vendor/fastText/website/static/docs/en/html/search/all_17.js +0 -7
  253. data/vendor/fastText/website/static/docs/en/html/search/all_2.html +0 -26
  254. data/vendor/fastText/website/static/docs/en/html/search/all_2.js +0 -17
  255. data/vendor/fastText/website/static/docs/en/html/search/all_3.html +0 -26
  256. data/vendor/fastText/website/static/docs/en/html/search/all_3.js +0 -17
  257. data/vendor/fastText/website/static/docs/en/html/search/all_4.html +0 -26
  258. data/vendor/fastText/website/static/docs/en/html/search/all_4.js +0 -10
  259. data/vendor/fastText/website/static/docs/en/html/search/all_5.html +0 -26
  260. data/vendor/fastText/website/static/docs/en/html/search/all_5.js +0 -12
  261. data/vendor/fastText/website/static/docs/en/html/search/all_6.html +0 -26
  262. data/vendor/fastText/website/static/docs/en/html/search/all_6.js +0 -18
  263. data/vendor/fastText/website/static/docs/en/html/search/all_7.html +0 -26
  264. data/vendor/fastText/website/static/docs/en/html/search/all_7.js +0 -8
  265. data/vendor/fastText/website/static/docs/en/html/search/all_8.html +0 -26
  266. data/vendor/fastText/website/static/docs/en/html/search/all_8.js +0 -11
  267. data/vendor/fastText/website/static/docs/en/html/search/all_9.html +0 -26
  268. data/vendor/fastText/website/static/docs/en/html/search/all_9.js +0 -5
  269. data/vendor/fastText/website/static/docs/en/html/search/all_a.html +0 -26
  270. data/vendor/fastText/website/static/docs/en/html/search/all_a.js +0 -17
  271. data/vendor/fastText/website/static/docs/en/html/search/all_b.html +0 -26
  272. data/vendor/fastText/website/static/docs/en/html/search/all_b.js +0 -27
  273. data/vendor/fastText/website/static/docs/en/html/search/all_c.html +0 -26
  274. data/vendor/fastText/website/static/docs/en/html/search/all_c.js +0 -26
  275. data/vendor/fastText/website/static/docs/en/html/search/all_d.html +0 -26
  276. data/vendor/fastText/website/static/docs/en/html/search/all_d.js +0 -9
  277. data/vendor/fastText/website/static/docs/en/html/search/all_e.html +0 -26
  278. data/vendor/fastText/website/static/docs/en/html/search/all_e.js +0 -35
  279. data/vendor/fastText/website/static/docs/en/html/search/all_f.html +0 -26
  280. data/vendor/fastText/website/static/docs/en/html/search/all_f.js +0 -16
  281. data/vendor/fastText/website/static/docs/en/html/search/classes_0.html +0 -26
  282. data/vendor/fastText/website/static/docs/en/html/search/classes_0.js +0 -4
  283. data/vendor/fastText/website/static/docs/en/html/search/classes_1.html +0 -26
  284. data/vendor/fastText/website/static/docs/en/html/search/classes_1.js +0 -4
  285. data/vendor/fastText/website/static/docs/en/html/search/classes_2.html +0 -26
  286. data/vendor/fastText/website/static/docs/en/html/search/classes_2.js +0 -4
  287. data/vendor/fastText/website/static/docs/en/html/search/classes_3.html +0 -26
  288. data/vendor/fastText/website/static/docs/en/html/search/classes_3.js +0 -4
  289. data/vendor/fastText/website/static/docs/en/html/search/classes_4.html +0 -26
  290. data/vendor/fastText/website/static/docs/en/html/search/classes_4.js +0 -5
  291. data/vendor/fastText/website/static/docs/en/html/search/classes_5.html +0 -26
  292. data/vendor/fastText/website/static/docs/en/html/search/classes_5.js +0 -4
  293. data/vendor/fastText/website/static/docs/en/html/search/classes_6.html +0 -26
  294. data/vendor/fastText/website/static/docs/en/html/search/classes_6.js +0 -4
  295. data/vendor/fastText/website/static/docs/en/html/search/classes_7.html +0 -26
  296. data/vendor/fastText/website/static/docs/en/html/search/classes_7.js +0 -4
  297. data/vendor/fastText/website/static/docs/en/html/search/classes_8.html +0 -26
  298. data/vendor/fastText/website/static/docs/en/html/search/classes_8.js +0 -4
  299. data/vendor/fastText/website/static/docs/en/html/search/close.png +0 -0
  300. data/vendor/fastText/website/static/docs/en/html/search/defines_0.html +0 -26
  301. data/vendor/fastText/website/static/docs/en/html/search/defines_0.js +0 -5
  302. data/vendor/fastText/website/static/docs/en/html/search/defines_1.html +0 -26
  303. data/vendor/fastText/website/static/docs/en/html/search/defines_1.js +0 -4
  304. data/vendor/fastText/website/static/docs/en/html/search/defines_2.html +0 -26
  305. data/vendor/fastText/website/static/docs/en/html/search/defines_2.js +0 -4
  306. data/vendor/fastText/website/static/docs/en/html/search/defines_3.html +0 -26
  307. data/vendor/fastText/website/static/docs/en/html/search/defines_3.js +0 -4
  308. data/vendor/fastText/website/static/docs/en/html/search/enums_0.html +0 -26
  309. data/vendor/fastText/website/static/docs/en/html/search/enums_0.js +0 -4
  310. data/vendor/fastText/website/static/docs/en/html/search/enums_1.html +0 -26
  311. data/vendor/fastText/website/static/docs/en/html/search/enums_1.js +0 -4
  312. data/vendor/fastText/website/static/docs/en/html/search/enums_2.html +0 -26
  313. data/vendor/fastText/website/static/docs/en/html/search/enums_2.js +0 -4
  314. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.html +0 -26
  315. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_0.js +0 -4
  316. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.html +0 -26
  317. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_1.js +0 -4
  318. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.html +0 -26
  319. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_2.js +0 -4
  320. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.html +0 -26
  321. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_3.js +0 -4
  322. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.html +0 -26
  323. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_4.js +0 -6
  324. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.html +0 -26
  325. data/vendor/fastText/website/static/docs/en/html/search/enumvalues_5.js +0 -4
  326. data/vendor/fastText/website/static/docs/en/html/search/files_0.html +0 -26
  327. data/vendor/fastText/website/static/docs/en/html/search/files_0.js +0 -5
  328. data/vendor/fastText/website/static/docs/en/html/search/files_1.html +0 -26
  329. data/vendor/fastText/website/static/docs/en/html/search/files_1.js +0 -5
  330. data/vendor/fastText/website/static/docs/en/html/search/files_2.html +0 -26
  331. data/vendor/fastText/website/static/docs/en/html/search/files_2.js +0 -5
  332. data/vendor/fastText/website/static/docs/en/html/search/files_3.html +0 -26
  333. data/vendor/fastText/website/static/docs/en/html/search/files_3.js +0 -8
  334. data/vendor/fastText/website/static/docs/en/html/search/files_4.html +0 -26
  335. data/vendor/fastText/website/static/docs/en/html/search/files_4.js +0 -5
  336. data/vendor/fastText/website/static/docs/en/html/search/files_5.html +0 -26
  337. data/vendor/fastText/website/static/docs/en/html/search/files_5.js +0 -5
  338. data/vendor/fastText/website/static/docs/en/html/search/files_6.html +0 -26
  339. data/vendor/fastText/website/static/docs/en/html/search/files_6.js +0 -4
  340. data/vendor/fastText/website/static/docs/en/html/search/files_7.html +0 -26
  341. data/vendor/fastText/website/static/docs/en/html/search/files_7.js +0 -5
  342. data/vendor/fastText/website/static/docs/en/html/search/files_8.html +0 -26
  343. data/vendor/fastText/website/static/docs/en/html/search/files_8.js +0 -5
  344. data/vendor/fastText/website/static/docs/en/html/search/functions_0.html +0 -26
  345. data/vendor/fastText/website/static/docs/en/html/search/functions_0.js +0 -14
  346. data/vendor/fastText/website/static/docs/en/html/search/functions_1.html +0 -26
  347. data/vendor/fastText/website/static/docs/en/html/search/functions_1.js +0 -5
  348. data/vendor/fastText/website/static/docs/en/html/search/functions_10.html +0 -26
  349. data/vendor/fastText/website/static/docs/en/html/search/functions_10.js +0 -5
  350. data/vendor/fastText/website/static/docs/en/html/search/functions_11.html +0 -26
  351. data/vendor/fastText/website/static/docs/en/html/search/functions_11.js +0 -18
  352. data/vendor/fastText/website/static/docs/en/html/search/functions_12.html +0 -26
  353. data/vendor/fastText/website/static/docs/en/html/search/functions_12.js +0 -8
  354. data/vendor/fastText/website/static/docs/en/html/search/functions_13.html +0 -26
  355. data/vendor/fastText/website/static/docs/en/html/search/functions_13.js +0 -5
  356. data/vendor/fastText/website/static/docs/en/html/search/functions_14.html +0 -26
  357. data/vendor/fastText/website/static/docs/en/html/search/functions_14.js +0 -4
  358. data/vendor/fastText/website/static/docs/en/html/search/functions_15.html +0 -26
  359. data/vendor/fastText/website/static/docs/en/html/search/functions_15.js +0 -4
  360. data/vendor/fastText/website/static/docs/en/html/search/functions_16.html +0 -26
  361. data/vendor/fastText/website/static/docs/en/html/search/functions_16.js +0 -4
  362. data/vendor/fastText/website/static/docs/en/html/search/functions_17.html +0 -26
  363. data/vendor/fastText/website/static/docs/en/html/search/functions_17.js +0 -7
  364. data/vendor/fastText/website/static/docs/en/html/search/functions_2.html +0 -26
  365. data/vendor/fastText/website/static/docs/en/html/search/functions_2.js +0 -11
  366. data/vendor/fastText/website/static/docs/en/html/search/functions_3.html +0 -26
  367. data/vendor/fastText/website/static/docs/en/html/search/functions_3.js +0 -9
  368. data/vendor/fastText/website/static/docs/en/html/search/functions_4.html +0 -26
  369. data/vendor/fastText/website/static/docs/en/html/search/functions_4.js +0 -4
  370. data/vendor/fastText/website/static/docs/en/html/search/functions_5.html +0 -26
  371. data/vendor/fastText/website/static/docs/en/html/search/functions_5.js +0 -7
  372. data/vendor/fastText/website/static/docs/en/html/search/functions_6.html +0 -26
  373. data/vendor/fastText/website/static/docs/en/html/search/functions_6.js +0 -17
  374. data/vendor/fastText/website/static/docs/en/html/search/functions_7.html +0 -26
  375. data/vendor/fastText/website/static/docs/en/html/search/functions_7.js +0 -5
  376. data/vendor/fastText/website/static/docs/en/html/search/functions_8.html +0 -26
  377. data/vendor/fastText/website/static/docs/en/html/search/functions_8.js +0 -8
  378. data/vendor/fastText/website/static/docs/en/html/search/functions_9.html +0 -26
  379. data/vendor/fastText/website/static/docs/en/html/search/functions_9.js +0 -4
  380. data/vendor/fastText/website/static/docs/en/html/search/functions_a.html +0 -26
  381. data/vendor/fastText/website/static/docs/en/html/search/functions_a.js +0 -8
  382. data/vendor/fastText/website/static/docs/en/html/search/functions_b.html +0 -26
  383. data/vendor/fastText/website/static/docs/en/html/search/functions_b.js +0 -10
  384. data/vendor/fastText/website/static/docs/en/html/search/functions_c.html +0 -26
  385. data/vendor/fastText/website/static/docs/en/html/search/functions_c.js +0 -10
  386. data/vendor/fastText/website/static/docs/en/html/search/functions_d.html +0 -26
  387. data/vendor/fastText/website/static/docs/en/html/search/functions_d.js +0 -6
  388. data/vendor/fastText/website/static/docs/en/html/search/functions_e.html +0 -26
  389. data/vendor/fastText/website/static/docs/en/html/search/functions_e.js +0 -26
  390. data/vendor/fastText/website/static/docs/en/html/search/functions_f.html +0 -26
  391. data/vendor/fastText/website/static/docs/en/html/search/functions_f.js +0 -6
  392. data/vendor/fastText/website/static/docs/en/html/search/mag_sel.png +0 -0
  393. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.html +0 -26
  394. data/vendor/fastText/website/static/docs/en/html/search/namespaces_0.js +0 -5
  395. data/vendor/fastText/website/static/docs/en/html/search/nomatches.html +0 -12
  396. data/vendor/fastText/website/static/docs/en/html/search/search.css +0 -271
  397. data/vendor/fastText/website/static/docs/en/html/search/search.js +0 -791
  398. data/vendor/fastText/website/static/docs/en/html/search/search_l.png +0 -0
  399. data/vendor/fastText/website/static/docs/en/html/search/search_m.png +0 -0
  400. data/vendor/fastText/website/static/docs/en/html/search/search_r.png +0 -0
  401. data/vendor/fastText/website/static/docs/en/html/search/searchdata.js +0 -42
  402. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.html +0 -26
  403. data/vendor/fastText/website/static/docs/en/html/search/typedefs_0.js +0 -4
  404. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.html +0 -26
  405. data/vendor/fastText/website/static/docs/en/html/search/typedefs_1.js +0 -4
  406. data/vendor/fastText/website/static/docs/en/html/search/variables_0.html +0 -26
  407. data/vendor/fastText/website/static/docs/en/html/search/variables_0.js +0 -4
  408. data/vendor/fastText/website/static/docs/en/html/search/variables_1.html +0 -26
  409. data/vendor/fastText/website/static/docs/en/html/search/variables_1.js +0 -6
  410. data/vendor/fastText/website/static/docs/en/html/search/variables_10.html +0 -26
  411. data/vendor/fastText/website/static/docs/en/html/search/variables_10.js +0 -8
  412. data/vendor/fastText/website/static/docs/en/html/search/variables_11.html +0 -26
  413. data/vendor/fastText/website/static/docs/en/html/search/variables_11.js +0 -11
  414. data/vendor/fastText/website/static/docs/en/html/search/variables_12.html +0 -26
  415. data/vendor/fastText/website/static/docs/en/html/search/variables_12.js +0 -4
  416. data/vendor/fastText/website/static/docs/en/html/search/variables_13.html +0 -26
  417. data/vendor/fastText/website/static/docs/en/html/search/variables_13.js +0 -10
  418. data/vendor/fastText/website/static/docs/en/html/search/variables_2.html +0 -26
  419. data/vendor/fastText/website/static/docs/en/html/search/variables_2.js +0 -9
  420. data/vendor/fastText/website/static/docs/en/html/search/variables_3.html +0 -26
  421. data/vendor/fastText/website/static/docs/en/html/search/variables_3.js +0 -9
  422. data/vendor/fastText/website/static/docs/en/html/search/variables_4.html +0 -26
  423. data/vendor/fastText/website/static/docs/en/html/search/variables_4.js +0 -7
  424. data/vendor/fastText/website/static/docs/en/html/search/variables_5.html +0 -26
  425. data/vendor/fastText/website/static/docs/en/html/search/variables_5.js +0 -4
  426. data/vendor/fastText/website/static/docs/en/html/search/variables_6.html +0 -26
  427. data/vendor/fastText/website/static/docs/en/html/search/variables_6.js +0 -5
  428. data/vendor/fastText/website/static/docs/en/html/search/variables_7.html +0 -26
  429. data/vendor/fastText/website/static/docs/en/html/search/variables_7.js +0 -5
  430. data/vendor/fastText/website/static/docs/en/html/search/variables_8.html +0 -26
  431. data/vendor/fastText/website/static/docs/en/html/search/variables_8.js +0 -4
  432. data/vendor/fastText/website/static/docs/en/html/search/variables_9.html +0 -26
  433. data/vendor/fastText/website/static/docs/en/html/search/variables_9.js +0 -10
  434. data/vendor/fastText/website/static/docs/en/html/search/variables_a.html +0 -26
  435. data/vendor/fastText/website/static/docs/en/html/search/variables_a.js +0 -14
  436. data/vendor/fastText/website/static/docs/en/html/search/variables_b.html +0 -26
  437. data/vendor/fastText/website/static/docs/en/html/search/variables_b.js +0 -17
  438. data/vendor/fastText/website/static/docs/en/html/search/variables_c.html +0 -26
  439. data/vendor/fastText/website/static/docs/en/html/search/variables_c.js +0 -6
  440. data/vendor/fastText/website/static/docs/en/html/search/variables_d.html +0 -26
  441. data/vendor/fastText/website/static/docs/en/html/search/variables_d.js +0 -10
  442. data/vendor/fastText/website/static/docs/en/html/search/variables_e.html +0 -26
  443. data/vendor/fastText/website/static/docs/en/html/search/variables_e.js +0 -11
  444. data/vendor/fastText/website/static/docs/en/html/search/variables_f.html +0 -26
  445. data/vendor/fastText/website/static/docs/en/html/search/variables_f.js +0 -6
  446. data/vendor/fastText/website/static/docs/en/html/splitbar.png +0 -0
  447. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node-members.html +0 -108
  448. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.html +0 -194
  449. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1Node.js +0 -8
  450. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry-members.html +0 -107
  451. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.html +0 -178
  452. data/vendor/fastText/website/static/docs/en/html/structfasttext_1_1entry.js +0 -7
  453. data/vendor/fastText/website/static/docs/en/html/sync_off.png +0 -0
  454. data/vendor/fastText/website/static/docs/en/html/sync_on.png +0 -0
  455. data/vendor/fastText/website/static/docs/en/html/tab_a.png +0 -0
  456. data/vendor/fastText/website/static/docs/en/html/tab_b.png +0 -0
  457. data/vendor/fastText/website/static/docs/en/html/tab_h.png +0 -0
  458. data/vendor/fastText/website/static/docs/en/html/tab_s.png +0 -0
  459. data/vendor/fastText/website/static/docs/en/html/tabs.css +0 -1
  460. data/vendor/fastText/website/static/docs/en/html/utils_8cc.html +0 -121
  461. data/vendor/fastText/website/static/docs/en/html/utils_8cc.js +0 -5
  462. data/vendor/fastText/website/static/docs/en/html/utils_8h.html +0 -122
  463. data/vendor/fastText/website/static/docs/en/html/utils_8h.js +0 -5
  464. data/vendor/fastText/website/static/docs/en/html/utils_8h_source.html +0 -104
  465. data/vendor/fastText/website/static/docs/en/html/vector_8cc.html +0 -121
  466. data/vendor/fastText/website/static/docs/en/html/vector_8cc.js +0 -4
  467. data/vendor/fastText/website/static/docs/en/html/vector_8h.html +0 -126
  468. data/vendor/fastText/website/static/docs/en/html/vector_8h.js +0 -5
  469. data/vendor/fastText/website/static/docs/en/html/vector_8h_source.html +0 -120
  470. data/vendor/fastText/website/static/fasttext.css +0 -48
  471. data/vendor/fastText/website/static/img/authors/armand_joulin.jpg +0 -0
  472. data/vendor/fastText/website/static/img/authors/christian_puhrsch.png +0 -0
  473. data/vendor/fastText/website/static/img/authors/edouard_grave.jpeg +0 -0
  474. data/vendor/fastText/website/static/img/authors/piotr_bojanowski.jpg +0 -0
  475. data/vendor/fastText/website/static/img/authors/tomas_mikolov.jpg +0 -0
  476. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img1.png +0 -0
  477. data/vendor/fastText/website/static/img/blog/2016-08-18-blog-post-img2.png +0 -0
  478. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img1.jpg +0 -0
  479. data/vendor/fastText/website/static/img/blog/2017-05-02-blog-post-img2.jpg +0 -0
  480. data/vendor/fastText/website/static/img/blog/2017-10-02-blog-post-img1.png +0 -0
  481. data/vendor/fastText/website/static/img/cbo_vs_skipgram.png +0 -0
  482. data/vendor/fastText/website/static/img/fasttext-icon-api.png +0 -0
  483. data/vendor/fastText/website/static/img/fasttext-icon-bg-web.png +0 -0
  484. data/vendor/fastText/website/static/img/fasttext-icon-color-square.png +0 -0
  485. data/vendor/fastText/website/static/img/fasttext-icon-color-web.png +0 -0
  486. data/vendor/fastText/website/static/img/fasttext-icon-faq.png +0 -0
  487. data/vendor/fastText/website/static/img/fasttext-icon-tutorial.png +0 -0
  488. data/vendor/fastText/website/static/img/fasttext-icon-white-web.png +0 -0
  489. data/vendor/fastText/website/static/img/fasttext-logo-color-web.png +0 -0
  490. data/vendor/fastText/website/static/img/fasttext-logo-white-web.png +0 -0
  491. data/vendor/fastText/website/static/img/logo-color.png +0 -0
  492. data/vendor/fastText/website/static/img/model-black.png +0 -0
  493. data/vendor/fastText/website/static/img/model-blue.png +0 -0
  494. data/vendor/fastText/website/static/img/model-red.png +0 -0
  495. data/vendor/fastText/website/static/img/ogimage.png +0 -0
  496. data/vendor/fastText/website/static/img/oss_logo.png +0 -0
  497. data/vendor/fastText/wikifil.pl +0 -57
  498. data/vendor/fastText/word-vector-example.sh +0 -39
@@ -1,13 +0,0 @@
1
- # Copyright (c) 2017-present, Facebook, Inc.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the MIT license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- from __future__ import absolute_import
8
- from __future__ import division
9
- from __future__ import print_function
10
- from __future__ import unicode_literals
11
-
12
- from .util import test
13
- from .util import find_nearest_neighbor
@@ -1,60 +0,0 @@
1
- # Copyright (c) 2017-present, Facebook, Inc.
2
- # All rights reserved.
3
- #
4
- # This source code is licensed under the MIT license found in the
5
- # LICENSE file in the root directory of this source tree.
6
-
7
- # NOTE: The purpose of this file is not to accumulate all useful utility
8
- # functions. This file should contain very commonly used and requested functions
9
- # (such as test). If you think you have a function at that level, please create
10
- # an issue and we will happily review your suggestion. This file is also not supposed
11
- # to pull in dependencies outside of numpy/scipy without very good reasons. For
12
- # example, this file should not use sklearn and matplotlib to produce a t-sne
13
- # plot of word embeddings or such.
14
-
15
- from __future__ import absolute_import
16
- from __future__ import division
17
- from __future__ import print_function
18
- from __future__ import unicode_literals
19
-
20
- import numpy as np
21
-
22
-
23
- # TODO: Add example on reproducing model.test with util.test and model.get_line
24
- def test(predictions, labels, k=1):
25
- """
26
- Return precision and recall modeled after fasttext's test
27
- """
28
- precision = 0.0
29
- nexamples = 0
30
- nlabels = 0
31
- for prediction, labels in zip(predictions, labels):
32
- for p in prediction:
33
- if p in labels:
34
- precision += 1
35
- nexamples += 1
36
- nlabels += len(labels)
37
- return (precision / (k * nexamples), precision / nlabels)
38
-
39
-
40
- def find_nearest_neighbor(query, vectors, ban_set, cossims=None):
41
- """
42
- query is a 1d numpy array corresponding to the vector to which you want to
43
- find the closest vector
44
- vectors is a 2d numpy array corresponding to the vectors you want to consider
45
- ban_set is a set of indicies within vectors you want to ignore for nearest match
46
- cossims is a 1d numpy array of size len(vectors), which can be passed for efficiency
47
-
48
- returns the index of the closest match to query within vectors
49
-
50
- """
51
- if cossims is None:
52
- cossims = np.matmul(vectors, query, out=cossims)
53
- else:
54
- np.matmul(vectors, query, out=cossims)
55
- rank = len(cossims) - 1
56
- result_i = np.argpartition(cossims, rank)[rank]
57
- while result_i in ban_set:
58
- rank -= 1
59
- result_i = np.argpartition(cossims, rank)[rank]
60
- return result_i
@@ -1,40 +0,0 @@
1
- myshuf() {
2
- perl -MList::Util=shuffle -e 'print shuffle(<>);' "$@";
3
- }
4
-
5
- normalize_text() {
6
- tr '[:upper:]' '[:lower:]' | sed -e 's/^/__label__/g' | \
7
- sed -e "s/'/ ' /g" -e 's/"//g' -e 's/\./ \. /g' -e 's/<br \/>/ /g' \
8
- -e 's/,/ , /g' -e 's/(/ ( /g' -e 's/)/ ) /g' -e 's/\!/ \! /g' \
9
- -e 's/\?/ \? /g' -e 's/\;/ /g' -e 's/\:/ /g' | tr -s " " | myshuf
10
- }
11
-
12
- RESULTDIR=result
13
- DATADIR=data
14
-
15
- mkdir -p "${RESULTDIR}"
16
- mkdir -p "${DATADIR}"
17
-
18
- if [ ! -f "${DATADIR}/dbpedia.train" ]
19
- then
20
- wget -c "https://github.com/le-scientifique/torchDatasets/raw/master/dbpedia_csv.tar.gz" -O "${DATADIR}/dbpedia_csv.tar.gz"
21
- tar -xzvf "${DATADIR}/dbpedia_csv.tar.gz" -C "${DATADIR}"
22
- cat "${DATADIR}/dbpedia_csv/train.csv" | normalize_text > "${DATADIR}/dbpedia.train"
23
- cat "${DATADIR}/dbpedia_csv/test.csv" | normalize_text > "${DATADIR}/dbpedia.test"
24
- fi
25
-
26
- make
27
-
28
- echo "Training..."
29
- ./fasttext supervised -input "${DATADIR}/dbpedia.train" -output "${RESULTDIR}/dbpedia" -dim 10 -lr 0.1 -wordNgrams 2 -minCount 1 -bucket 10000000 -epoch 5 -thread 4
30
-
31
- echo "Quantizing..."
32
- ./fasttext quantize -output "${RESULTDIR}/dbpedia" -input "${DATADIR}/dbpedia.train" -qnorm -retrain -epoch 1 -cutoff 100000
33
-
34
- echo "Testing original model..."
35
- ./fasttext test "${RESULTDIR}/dbpedia.bin" "${DATADIR}/dbpedia.test"
36
- echo "Testing quantized model..."
37
- ./fasttext test "${RESULTDIR}/dbpedia.ftz" "${DATADIR}/dbpedia.test"
38
-
39
- wc -c < "${RESULTDIR}/dbpedia.bin" | awk '{print "Size of the original model:\t",$1;}'
40
- wc -c < "${RESULTDIR}/dbpedia.ftz" | awk '{print "Size of the quantized model:\t",$1;}'
@@ -1,60 +0,0 @@
1
- #!/usr/bin/env python
2
- # -*- coding: utf-8 -*-
3
- #
4
- # Copyright (c) 2016-present, Facebook, Inc.
5
- # All rights reserved.
6
- #
7
- # This source code is licensed under the MIT license found in the
8
- # LICENSE file in the root directory of this source tree.
9
- #
10
-
11
- # To run the integration tests you must first fetch all the required test data.
12
- # Have a look at tests/fetch_test_data.sh
13
- # You will then need to point this script to the corresponding folder
14
-
15
- from __future__ import absolute_import
16
- from __future__ import division
17
- from __future__ import print_function
18
- from __future__ import unicode_literals
19
-
20
- import unittest
21
- import argparse
22
- from fasttext.tests import gen_tests
23
- from fasttext.tests import gen_unit_tests
24
-
25
-
26
- def run_tests(tests):
27
- suite = unittest.TestLoader().loadTestsFromTestCase(tests)
28
- unittest.TextTestRunner(verbosity=3).run(suite)
29
-
30
-
31
- if __name__ == "__main__":
32
- parser = argparse.ArgumentParser()
33
- parser.add_argument(
34
- "-u", "--unit-tests", help="run unit tests", action="store_true"
35
- )
36
- parser.add_argument(
37
- "-i",
38
- "--integration-tests",
39
- help="run integration tests",
40
- action="store_true"
41
- )
42
- parser.add_argument(
43
- "-v",
44
- "--verbose",
45
- default=1,
46
- help="verbosity level (default 1)",
47
- type=int,
48
- )
49
- parser.add_argument("--data-dir", help="Full path to data directory")
50
- args = parser.parse_args()
51
- if args.unit_tests:
52
- run_tests(gen_unit_tests(verbose=args.verbose))
53
- if args.integration_tests:
54
- if args.data_dir is None:
55
- raise ValueError(
56
- "Need data directory! Consult tests/fetch_test_data.sh"
57
- )
58
- run_tests(gen_tests(args.data_dir, verbose=args.verbose))
59
- if not args.unit_tests and not args.integration_tests:
60
- print("Ran no tests")
@@ -1,19 +0,0 @@
1
- # Fast Linear Model for Knowledge Graph Embeddings
2
-
3
- ## Knowledge base completion
4
-
5
- These scripts require the [fastText library](https://github.com/facebookresearch/fastText).
6
-
7
- Run the data.sh script to download and format the datasets. Then run any of the scripts to train and test on a given dataset.
8
-
9
- ## Reference
10
-
11
- If you use this code please cite:
12
-
13
- @article{joulin2017fast,
14
- title={Fast Linear Model for Knowledge Graph Embeddings},
15
- author={Joulin, Armand and Grave, Edouard and Bojanowski, Piotr and Nickel, Maximilian and Mikolov, Tomas},
16
- journal={arXiv preprint arXiv:1710.10881},
17
- year={2017}
18
- }
19
-
@@ -1,69 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # Copyright (c) 2017-present, Facebook, Inc.
4
- # All rights reserved.
5
- #
6
- # This source code is licensed under the MIT license found in the
7
- # LICENSE file in the root directory of this source tree.
8
- #
9
- set -e
10
- DATADIR=data/
11
-
12
- if [ ! -d "$DATADIR" ]; then
13
- mkdir $DATADIR
14
- fi
15
-
16
- cd $DATADIR
17
- echo "preparing WN18"
18
- #wget -P . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:wordnet-mlj12.tar.gz
19
- #mv fetch.php\?media\=en\:wordnet-mlj12.tar.gz wordnet-mlj12.tar.gz
20
- wget -P . https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/wordnet-mlj12.tar.gz
21
- tar -xzvf wordnet-mlj12.tar.gz
22
- DIR=wordnet-mlj12
23
- for f in ${DIR}/wordnet-ml*.txt;
24
- do
25
- fn=${DIR}/ft_$(basename $f)
26
- awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
27
- done
28
- cat ${DIR}/ft_* > ${DIR}/ft_wordnet-mlj12-full.txt
29
- cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_wordnet-mlj12-valid+train.txt
30
-
31
- echo "preparing FB15K"
32
- #wget https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:fb15k.tgz
33
- #mv fetch.php\?media\=en\:fb15k.tgz fb15k.tgz
34
- wget https://github.com/mana-ysh/knowledge-graph-embeddings/raw/master/dat/fb15k.tgz
35
- tar -xzvf fb15k.tgz
36
- DIR=FB15k/
37
- for f in ${DIR}/freebase*.txt;
38
- do
39
- fn=${DIR}/ft_$(basename $f)
40
- echo $f " --> " $fn
41
- awk '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${f} > ${fn};
42
- done
43
- cat ${DIR}/ft_* > ${DIR}/ft_freebase_mtr100_mte100-full.txt
44
- cat ${DIR}/ft_*train.txt ${DIR}/ft_*valid.txt > ${DIR}/ft_freebase_mtr100_mte100-valid+train.txt
45
-
46
- echo "preparing FB15K-237"
47
- wget https://download.microsoft.com/download/8/7/0/8700516A-AB3D-4850-B4BB-805C515AECE1/FB15K-237.2.zip
48
- unzip FB15K-237.2.zip
49
- DIR=Release/
50
- for f in train.txt test.txt valid.txt
51
- do
52
- fn=${DIR}/ft_$(basename $f)
53
- echo $f " --> " $fn
54
- awk -F "\t" '{print "__label__"$1,"0_"$2, $3;print $1,"1_"$2," __label__"$3}' < ${DIR}/${f} > ${fn};
55
- done
56
- cat ${DIR}/ft_*.txt > ${DIR}/ft_full.txt
57
- cat ${DIR}/ft_train.txt ${DIR}/ft_valid.txt > ${DIR}/ft_valid+train.txt
58
-
59
- echo "preparing SVO"
60
- wget . https://everest.hds.utc.fr/lib/exe/fetch.php?media=en:svo-tensor-dataset.tar.gz
61
- mv fetch.php?media=en:svo-tensor-dataset.tar.gz svo-tensor-dataset.tar.gz
62
- tar -xzvf svo-tensor-dataset.tar.gz
63
- DIR=SVO-tensor-dataset
64
- for f in ${DIR}/svo_data*.dat;
65
- do
66
- fn=${DIR}/ft_$(basename $f)
67
- awk '{print "0_"$1,"1_"$3,"__label__"$2;}' < ${f} > ${fn};
68
- done
69
- cat ${DIR}/ft_*train*.dat ${DIR}/ft_*valid*.dat > ${DIR}/ft_svo_data-valid+train.dat
@@ -1,108 +0,0 @@
1
- /**
2
- * Copyright (c) 2017-present, Facebook, Inc.
3
- * All rights reserved.
4
- *
5
- * This source code is licensed under the MIT license found in the
6
- * LICENSE file in the root directory of this source tree.
7
- */
8
-
9
- #include <unordered_map>
10
- #include <iostream>
11
- #include <fstream>
12
- #include <string>
13
- #include <vector>
14
-
15
- std::string EOS = "</s>";
16
-
17
- bool readWord(std::istream& in, std::string& word)
18
- {
19
- char c;
20
- std::streambuf& sb = *in.rdbuf();
21
- word.clear();
22
- while ((c = sb.sbumpc()) != EOF) {
23
- if (c == ' ' || c == '\n' || c == '\r' || c == '\t' || c == '\v' ||
24
- c == '\f' || c == '\0') {
25
- if (word.empty()) {
26
- if (c == '\n') {
27
- word += EOS;
28
- return true;
29
- }
30
- continue;
31
- } else {
32
- if (c == '\n')
33
- sb.sungetc();
34
- return true;
35
- }
36
- }
37
- word.push_back(c);
38
- }
39
- in.get();
40
- return !word.empty();
41
- }
42
-
43
- int main(int argc, char** argv) {
44
- int k = 10;
45
- if (argc < 4) {
46
- std::cerr<<"eval <pred> <gt> <kb> [<k>]"<<std::endl;
47
- exit(1);
48
- }
49
- if (argc == 5) { k = atoi(argv[4]);}
50
-
51
- std::string predfn(argv[1]);
52
- std::ifstream predf(predfn);
53
- std::string gtfn(argv[2]);
54
- std::ifstream gtf(gtfn);
55
- std::string kbfn(argv[3]);
56
- std::ifstream kbf(kbfn);
57
-
58
- if (!predf.is_open() || !gtf.is_open() || !kbf.is_open()) {
59
- std::cerr << "Files cannot be opened!" << std::endl;
60
- exit(EXIT_FAILURE);
61
- }
62
-
63
- std::unordered_map< std::string,
64
- std::unordered_map< std::string, bool > > KB;
65
-
66
- while (kbf.peek() != EOF) {
67
- std::string label, key, word;
68
- while (readWord(kbf, word)) {
69
- if (word == EOS) {break;}
70
- if (word.find("__label__") == 0) {label = word;}
71
- else {key += "|" + word;}
72
- }
73
- KB[key][label] = true;
74
- }
75
- kbf.close();
76
-
77
- double precision = 0.0;
78
- int32_t nexamples = 0;
79
- while (predf.peek() != EOF || gtf.peek() != EOF) {
80
- if (predf.peek() == EOF || gtf.peek() == EOF) {
81
- std::cerr<<"pred / gt files have diff sizes"<<std::endl;
82
- exit(1);
83
- }
84
- std::string label, key, word;
85
-
86
- while (readWord(gtf, word)) {
87
- if (word == EOS) {break;}
88
- if ( word.find("__label__") == 0) {label = word;}
89
- else {key += "|" + word;}
90
- }
91
- if (KB.find(key) == KB.end()) {
92
- std::cerr<<"empty key!"<<std::endl; exit(1);
93
- }
94
-
95
- int count = 0;bool eval = true;
96
- while (readWord(predf, word)) {
97
- if (word == EOS) {break;}
98
- if (!eval) {continue;}
99
- if (label == word) {precision += 1.0; eval = false;}
100
- else if (KB[key].find(word) == KB[key].end()) {count++;}
101
- if (count == k) {eval = false;}
102
- }
103
- nexamples++;
104
- }
105
- predf.close(); gtf.close();
106
- std::cout << "N:\t" << nexamples << std::endl;
107
- std::cout << "R@" << k << "\t" << precision / nexamples << std::endl;
108
- }
@@ -1,49 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for FB15k
10
- DIR=data/FB15k/
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
- pushd $FASTTEXTDIR
15
- make opt
16
- popd
17
- ft=${FASTTEXTDIR}/fasttext
18
-
19
- g++ -std=c++0x eval.cpp -o eval
20
-
21
- ## Train model and test it on validation:
22
- dim=100
23
- epoch=100
24
- neg=100
25
- model=data/fb15
26
- pred=data/fbpred
27
-
28
- echo "---- train ----"
29
- $ft supervised -input $DIR/ft_freebase_mtr100_mte100-train.txt \
30
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg -minCount 0
31
-
32
- echo "computing raw hits@10..."
33
- $ft test ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10="$2}'
34
-
35
- echo "computing filtered hit@10..."
36
- $ft predict ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 20000 > $pred
37
- ./eval $pred ${DIR}/ft_freebase_mtr100_mte100-test.txt $DIR/ft_freebase_mtr100_mte100-full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
38
-
39
- echo "---- train+val ----"
40
-
41
- $ft supervised -input $DIR/ft_freebase_mtr100_mte100-valid+train.txt \
42
- -dim ${dim} -epoch ${dim} -output ${model} -lr .2 -thread 20 -loss ns -neg ${neg} -minCount 0
43
-
44
- echo "computing raw hits@10..."
45
- $ft test ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 10 2> /dev/null | awk '{if(NR==3) print "raw hit@10="$2}'
46
-
47
- echo "computing filtered hit@10..."
48
- $ft predict ${model}.bin $DIR/ft_freebase_mtr100_mte100-test.txt 20000 > $pred
49
- ./eval $pred ${DIR}/ft_freebase_mtr100_mte100-test.txt $DIR/ft_freebase_mtr100_mte100-full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
@@ -1,45 +0,0 @@
1
- #!/usr/bin/env bash
2
- #
3
- # copyright (c) 2017-present, facebook, inc.
4
- # all rights reserved.
5
- #
6
- # this source code is licensed under the MIT license found in the
7
- # license file in the root directory of this source tree.
8
- #
9
- # script for FB15k237
10
- DIR=data/Release/
11
- FASTTEXTDIR=../../
12
-
13
- # compile
14
-
15
- pushd $FASTTEXTDIR
16
- make opt
17
- popd
18
- ft=${FASTTEXTDIR}/fasttext
19
-
20
- g++ -std=c++0x eval.cpp -o eval
21
-
22
- ## Train model and test it on validation:
23
-
24
- pred=data/fb237pred
25
- model=data/fb15k237
26
- dim=50
27
- epoch=10
28
- neg=500
29
-
30
- echo "---- train ----"
31
- $ft supervised -input $DIR/ft_train.txt \
32
- -dim $dim -epoch $epoch -output ${model} -lr .2 -thread 20 -loss ns -neg $neg -minCount 0
33
-
34
- echo "computing filtered hit@10..."
35
- $ft predict ${model}.bin $DIR/ft_test.txt 20000 > $pred
36
- ./eval $pred ${DIR}/ft_test.txt $DIR/ft_full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'
37
-
38
- echo "---- train+val ----"
39
-
40
- $ft supervised -input $DIR/ft_valid+train.txt \
41
- -dim ${dim} -epoch ${dim} -output ${model} -lr .2 -thread 20 -loss ns -neg ${neg} -minCount 0
42
-
43
- echo "computing filtered hit@10..."
44
- $ft predict ${model}.bin $DIR/ft_test.txt 20000 > $pred
45
- ./eval $pred ${DIR}/ft_test.txt $DIR/ft_full.txt 10 | awk '{if(NR==2) print "filtered hit@10="$2}'