simplificator-babel 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (377) hide show
  1. data/.document +5 -0
  2. data/.gitignore +6 -0
  3. data/README.markdown +50 -7
  4. data/Rakefile +46 -0
  5. data/VERSION.yml +2 -2
  6. data/babel.gemspec +64 -0
  7. data/lib/babel/babel.rb +84 -23
  8. data/lib/babel/profile.rb +43 -25
  9. data/lib/babel/string_extensions.rb +22 -17
  10. data/lib/profiles/{profile_deu_1996.yml → profile_deu.yml} +2 -1
  11. data/lib/profiles/profile_eng.yml +2 -1
  12. data/lib/profiles/profile_fra.yml +2 -1
  13. data/lib/profiles/profile_ita.yml +22432 -0
  14. data/lib/profiles/profile_spa.yml +2 -1
  15. data/lib/profiles/udhr_txt.zip +0 -0
  16. data/samples/guessing.rb +28 -0
  17. data/test/babel_test.rb +3 -34
  18. data/test/string_extensions_test.rb +10 -8
  19. metadata +8 -361
  20. data/lib/data/udhr_txt/index.xml +0 -385
  21. data/lib/data/udhr_txt/udhr_007.txt +0 -220
  22. data/lib/data/udhr_txt/udhr_008.txt +0 -220
  23. data/lib/data/udhr_txt/udhr_009.txt +0 -228
  24. data/lib/data/udhr_txt/udhr_010.txt +0 -219
  25. data/lib/data/udhr_txt/udhr_011.txt +0 -232
  26. data/lib/data/udhr_txt/udhr_abk.txt +0 -218
  27. data/lib/data/udhr_txt/udhr_ace.txt +0 -221
  28. data/lib/data/udhr_txt/udhr_acu.txt +0 -222
  29. data/lib/data/udhr_txt/udhr_ada.txt +0 -220
  30. data/lib/data/udhr_txt/udhr_afr.txt +0 -219
  31. data/lib/data/udhr_txt/udhr_agr.txt +0 -219
  32. data/lib/data/udhr_txt/udhr_aii.txt +0 -216
  33. data/lib/data/udhr_txt/udhr_ajg.txt +0 -219
  34. data/lib/data/udhr_txt/udhr_aka_akuapem.txt +0 -221
  35. data/lib/data/udhr_txt/udhr_aka_asante.txt +0 -220
  36. data/lib/data/udhr_txt/udhr_aka_fante.txt +0 -219
  37. data/lib/data/udhr_txt/udhr_als.txt +0 -220
  38. data/lib/data/udhr_txt/udhr_amc.txt +0 -215
  39. data/lib/data/udhr_txt/udhr_ame.txt +0 -222
  40. data/lib/data/udhr_txt/udhr_amh.txt +0 -209
  41. data/lib/data/udhr_txt/udhr_amr.txt +0 -221
  42. data/lib/data/udhr_txt/udhr_arb.txt +0 -220
  43. data/lib/data/udhr_txt/udhr_arl.txt +0 -222
  44. data/lib/data/udhr_txt/udhr_arn.txt +0 -218
  45. data/lib/data/udhr_txt/udhr_ast.txt +0 -221
  46. data/lib/data/udhr_txt/udhr_auv.txt +0 -217
  47. data/lib/data/udhr_txt/udhr_ayr.txt +0 -218
  48. data/lib/data/udhr_txt/udhr_azj_cyrl.txt +0 -218
  49. data/lib/data/udhr_txt/udhr_azj_latn.txt +0 -218
  50. data/lib/data/udhr_txt/udhr_bam.txt +0 -218
  51. data/lib/data/udhr_txt/udhr_ban.txt +0 -222
  52. data/lib/data/udhr_txt/udhr_bba.txt +0 -218
  53. data/lib/data/udhr_txt/udhr_bci.txt +0 -217
  54. data/lib/data/udhr_txt/udhr_bcl.txt +0 -219
  55. data/lib/data/udhr_txt/udhr_bel.txt +0 -221
  56. data/lib/data/udhr_txt/udhr_bem.txt +0 -217
  57. data/lib/data/udhr_txt/udhr_ben.txt +0 -222
  58. data/lib/data/udhr_txt/udhr_bho.txt +0 -219
  59. data/lib/data/udhr_txt/udhr_bin.txt +0 -232
  60. data/lib/data/udhr_txt/udhr_bis.txt +0 -218
  61. data/lib/data/udhr_txt/udhr_blu.txt +0 -219
  62. data/lib/data/udhr_txt/udhr_boa.txt +0 -223
  63. data/lib/data/udhr_txt/udhr_bod.txt +0 -221
  64. data/lib/data/udhr_txt/udhr_bos_cyrl.txt +0 -220
  65. data/lib/data/udhr_txt/udhr_bos_latn.txt +0 -220
  66. data/lib/data/udhr_txt/udhr_bre.txt +0 -222
  67. data/lib/data/udhr_txt/udhr_btb.txt +0 -217
  68. data/lib/data/udhr_txt/udhr_bug.txt +0 -222
  69. data/lib/data/udhr_txt/udhr_bul.txt +0 -218
  70. data/lib/data/udhr_txt/udhr_cab.txt +0 -221
  71. data/lib/data/udhr_txt/udhr_cak.txt +0 -217
  72. data/lib/data/udhr_txt/udhr_cat.txt +0 -220
  73. data/lib/data/udhr_txt/udhr_cbr.txt +0 -219
  74. data/lib/data/udhr_txt/udhr_cbs.txt +0 -153
  75. data/lib/data/udhr_txt/udhr_cbt.txt +0 -220
  76. data/lib/data/udhr_txt/udhr_cbu.txt +0 -218
  77. data/lib/data/udhr_txt/udhr_ccx.txt +0 -222
  78. data/lib/data/udhr_txt/udhr_ceb.txt +0 -218
  79. data/lib/data/udhr_txt/udhr_ces.txt +0 -221
  80. data/lib/data/udhr_txt/udhr_cha.txt +0 -219
  81. data/lib/data/udhr_txt/udhr_chj.txt +0 -220
  82. data/lib/data/udhr_txt/udhr_chk.txt +0 -220
  83. data/lib/data/udhr_txt/udhr_chr.txt +0 -10
  84. data/lib/data/udhr_txt/udhr_cic.txt +0 -220
  85. data/lib/data/udhr_txt/udhr_cjk.txt +0 -218
  86. data/lib/data/udhr_txt/udhr_cjk_AO.txt +0 -220
  87. data/lib/data/udhr_txt/udhr_ckb.txt +0 -217
  88. data/lib/data/udhr_txt/udhr_cmn_hans.txt +0 -220
  89. data/lib/data/udhr_txt/udhr_cmn_hant.txt +0 -220
  90. data/lib/data/udhr_txt/udhr_cnh.txt +0 -220
  91. data/lib/data/udhr_txt/udhr_cni.txt +0 -220
  92. data/lib/data/udhr_txt/udhr_cos.txt +0 -218
  93. data/lib/data/udhr_txt/udhr_cot.txt +0 -222
  94. data/lib/data/udhr_txt/udhr_cpu.txt +0 -219
  95. data/lib/data/udhr_txt/udhr_crs.txt +0 -217
  96. data/lib/data/udhr_txt/udhr_csa.txt +0 -223
  97. data/lib/data/udhr_txt/udhr_csw.txt +0 -163
  98. data/lib/data/udhr_txt/udhr_ctd.txt +0 -222
  99. data/lib/data/udhr_txt/udhr_cym.txt +0 -222
  100. data/lib/data/udhr_txt/udhr_dag.txt +0 -217
  101. data/lib/data/udhr_txt/udhr_dan.txt +0 -224
  102. data/lib/data/udhr_txt/udhr_ddn.txt +0 -217
  103. data/lib/data/udhr_txt/udhr_deu_1901.txt +0 -220
  104. data/lib/data/udhr_txt/udhr_deu_1996.txt +0 -220
  105. data/lib/data/udhr_txt/udhr_dga.txt +0 -220
  106. data/lib/data/udhr_txt/udhr_dip.txt +0 -217
  107. data/lib/data/udhr_txt/udhr_div.txt +0 -220
  108. data/lib/data/udhr_txt/udhr_dyo.txt +0 -217
  109. data/lib/data/udhr_txt/udhr_dzo.txt +0 -9
  110. data/lib/data/udhr_txt/udhr_ell_monotonic.txt +0 -220
  111. data/lib/data/udhr_txt/udhr_ell_polytonic.txt +0 -220
  112. data/lib/data/udhr_txt/udhr_emk.txt +0 -218
  113. data/lib/data/udhr_txt/udhr_eml.txt +0 -219
  114. data/lib/data/udhr_txt/udhr_eng.txt +0 -219
  115. data/lib/data/udhr_txt/udhr_epo.txt +0 -221
  116. data/lib/data/udhr_txt/udhr_est.txt +0 -219
  117. data/lib/data/udhr_txt/udhr_eus.txt +0 -220
  118. data/lib/data/udhr_txt/udhr_eve.txt +0 -207
  119. data/lib/data/udhr_txt/udhr_ewe.txt +0 -218
  120. data/lib/data/udhr_txt/udhr_fao.txt +0 -219
  121. data/lib/data/udhr_txt/udhr_fij.txt +0 -224
  122. data/lib/data/udhr_txt/udhr_fin.txt +0 -224
  123. data/lib/data/udhr_txt/udhr_flm.txt +0 -219
  124. data/lib/data/udhr_txt/udhr_fon.txt +0 -217
  125. data/lib/data/udhr_txt/udhr_fra.txt +0 -218
  126. data/lib/data/udhr_txt/udhr_fri.txt +0 -219
  127. data/lib/data/udhr_txt/udhr_fuc.txt +0 -217
  128. data/lib/data/udhr_txt/udhr_fur.txt +0 -220
  129. data/lib/data/udhr_txt/udhr_gaa.txt +0 -220
  130. data/lib/data/udhr_txt/udhr_gag.txt +0 -223
  131. data/lib/data/udhr_txt/udhr_gax.txt +0 -222
  132. data/lib/data/udhr_txt/udhr_gjn.txt +0 -220
  133. data/lib/data/udhr_txt/udhr_gkp.txt +0 -216
  134. data/lib/data/udhr_txt/udhr_gla.txt +0 -229
  135. data/lib/data/udhr_txt/udhr_gle.txt +0 -215
  136. data/lib/data/udhr_txt/udhr_glg.txt +0 -217
  137. data/lib/data/udhr_txt/udhr_guc.txt +0 -221
  138. data/lib/data/udhr_txt/udhr_gug.txt +0 -210
  139. data/lib/data/udhr_txt/udhr_guj.txt +0 -219
  140. data/lib/data/udhr_txt/udhr_gyr.txt +0 -203
  141. data/lib/data/udhr_txt/udhr_hat_kreyol.txt +0 -221
  142. data/lib/data/udhr_txt/udhr_hat_popular.txt +0 -221
  143. data/lib/data/udhr_txt/udhr_hau_NE.txt +0 -219
  144. data/lib/data/udhr_txt/udhr_hau_NG.txt +0 -219
  145. data/lib/data/udhr_txt/udhr_haw.txt +0 -219
  146. data/lib/data/udhr_txt/udhr_hea.txt +0 -219
  147. data/lib/data/udhr_txt/udhr_heb.txt +0 -216
  148. data/lib/data/udhr_txt/udhr_hil.txt +0 -217
  149. data/lib/data/udhr_txt/udhr_hin.txt +0 -222
  150. data/lib/data/udhr_txt/udhr_hms.txt +0 -219
  151. data/lib/data/udhr_txt/udhr_hna.txt +0 -217
  152. data/lib/data/udhr_txt/udhr_hni.txt +0 -218
  153. data/lib/data/udhr_txt/udhr_hrv.txt +0 -218
  154. data/lib/data/udhr_txt/udhr_hsb.txt +0 -220
  155. data/lib/data/udhr_txt/udhr_hun.txt +0 -218
  156. data/lib/data/udhr_txt/udhr_hus.txt +0 -222
  157. data/lib/data/udhr_txt/udhr_huu.txt +0 -220
  158. data/lib/data/udhr_txt/udhr_hva.txt +0 -220
  159. data/lib/data/udhr_txt/udhr_hye.txt +0 -234
  160. data/lib/data/udhr_txt/udhr_ibb.txt +0 -235
  161. data/lib/data/udhr_txt/udhr_ibo.txt +0 -219
  162. data/lib/data/udhr_txt/udhr_ido.txt +0 -224
  163. data/lib/data/udhr_txt/udhr_iii.txt +0 -9
  164. data/lib/data/udhr_txt/udhr_ike.txt +0 -163
  165. data/lib/data/udhr_txt/udhr_ilo.txt +0 -217
  166. data/lib/data/udhr_txt/udhr_ina.txt +0 -220
  167. data/lib/data/udhr_txt/udhr_ind.txt +0 -219
  168. data/lib/data/udhr_txt/udhr_isl.txt +0 -217
  169. data/lib/data/udhr_txt/udhr_ita.txt +0 -221
  170. data/lib/data/udhr_txt/udhr_jav.txt +0 -222
  171. data/lib/data/udhr_txt/udhr_jpn.txt +0 -219
  172. data/lib/data/udhr_txt/udhr_kal.txt +0 -218
  173. data/lib/data/udhr_txt/udhr_kan.txt +0 -216
  174. data/lib/data/udhr_txt/udhr_kat.txt +0 -221
  175. data/lib/data/udhr_txt/udhr_kaz.txt +0 -218
  176. data/lib/data/udhr_txt/udhr_kbp.txt +0 -218
  177. data/lib/data/udhr_txt/udhr_kde.txt +0 -212
  178. data/lib/data/udhr_txt/udhr_kea.txt +0 -219
  179. data/lib/data/udhr_txt/udhr_kek.txt +0 -219
  180. data/lib/data/udhr_txt/udhr_khk.txt +0 -217
  181. data/lib/data/udhr_txt/udhr_khk_mong.txt +0 -11
  182. data/lib/data/udhr_txt/udhr_khm.txt +0 -220
  183. data/lib/data/udhr_txt/udhr_kin.txt +0 -220
  184. data/lib/data/udhr_txt/udhr_kir.txt +0 -220
  185. data/lib/data/udhr_txt/udhr_kmb.txt +0 -219
  186. data/lib/data/udhr_txt/udhr_knc.txt +0 -230
  187. data/lib/data/udhr_txt/udhr_kng.txt +0 -219
  188. data/lib/data/udhr_txt/udhr_kng_AO.txt +0 -219
  189. data/lib/data/udhr_txt/udhr_koo.txt +0 -216
  190. data/lib/data/udhr_txt/udhr_kor.txt +0 -219
  191. data/lib/data/udhr_txt/udhr_kqn.txt +0 -218
  192. data/lib/data/udhr_txt/udhr_kri.txt +0 -226
  193. data/lib/data/udhr_txt/udhr_ktu.txt +0 -219
  194. data/lib/data/udhr_txt/udhr_lao.txt +0 -223
  195. data/lib/data/udhr_txt/udhr_lat.txt +0 -221
  196. data/lib/data/udhr_txt/udhr_lat_1.txt +0 -220
  197. data/lib/data/udhr_txt/udhr_lav.txt +0 -220
  198. data/lib/data/udhr_txt/udhr_lia.txt +0 -218
  199. data/lib/data/udhr_txt/udhr_lin.txt +0 -217
  200. data/lib/data/udhr_txt/udhr_lin_tones.txt +0 -214
  201. data/lib/data/udhr_txt/udhr_lit.txt +0 -218
  202. data/lib/data/udhr_txt/udhr_lnc.txt +0 -219
  203. data/lib/data/udhr_txt/udhr_lns.txt +0 -219
  204. data/lib/data/udhr_txt/udhr_loz.txt +0 -219
  205. data/lib/data/udhr_txt/udhr_ltz.txt +0 -218
  206. data/lib/data/udhr_txt/udhr_lua.txt +0 -219
  207. data/lib/data/udhr_txt/udhr_lue.txt +0 -217
  208. data/lib/data/udhr_txt/udhr_lug.txt +0 -216
  209. data/lib/data/udhr_txt/udhr_lun.txt +0 -216
  210. data/lib/data/udhr_txt/udhr_mad.txt +0 -223
  211. data/lib/data/udhr_txt/udhr_mag.txt +0 -220
  212. data/lib/data/udhr_txt/udhr_mah.txt +0 -220
  213. data/lib/data/udhr_txt/udhr_mai.txt +0 -223
  214. data/lib/data/udhr_txt/udhr_mal.txt +0 -210
  215. data/lib/data/udhr_txt/udhr_mam.txt +0 -218
  216. data/lib/data/udhr_txt/udhr_mar.txt +0 -219
  217. data/lib/data/udhr_txt/udhr_maz.txt +0 -218
  218. data/lib/data/udhr_txt/udhr_mcd.txt +0 -220
  219. data/lib/data/udhr_txt/udhr_mcf.txt +0 -223
  220. data/lib/data/udhr_txt/udhr_men.txt +0 -222
  221. data/lib/data/udhr_txt/udhr_mic.txt +0 -218
  222. data/lib/data/udhr_txt/udhr_min.txt +0 -221
  223. data/lib/data/udhr_txt/udhr_miq.txt +0 -213
  224. data/lib/data/udhr_txt/udhr_mkd.txt +0 -221
  225. data/lib/data/udhr_txt/udhr_mlt.txt +0 -217
  226. data/lib/data/udhr_txt/udhr_mly_arab.txt +0 -219
  227. data/lib/data/udhr_txt/udhr_mly_latn.txt +0 -218
  228. data/lib/data/udhr_txt/udhr_mos.txt +0 -216
  229. data/lib/data/udhr_txt/udhr_mri.txt +0 -219
  230. data/lib/data/udhr_txt/udhr_mxi.txt +0 -218
  231. data/lib/data/udhr_txt/udhr_mxv.txt +0 -223
  232. data/lib/data/udhr_txt/udhr_mya.txt +0 -219
  233. data/lib/data/udhr_txt/udhr_mzi.txt +0 -227
  234. data/lib/data/udhr_txt/udhr_nav.txt +0 -219
  235. data/lib/data/udhr_txt/udhr_nba.txt +0 -257
  236. data/lib/data/udhr_txt/udhr_nbl.txt +0 -218
  237. data/lib/data/udhr_txt/udhr_ndo.txt +0 -217
  238. data/lib/data/udhr_txt/udhr_nep.txt +0 -214
  239. data/lib/data/udhr_txt/udhr_nhn.txt +0 -221
  240. data/lib/data/udhr_txt/udhr_nld.txt +0 -217
  241. data/lib/data/udhr_txt/udhr_nno.txt +0 -219
  242. data/lib/data/udhr_txt/udhr_nob.txt +0 -225
  243. data/lib/data/udhr_txt/udhr_not.txt +0 -218
  244. data/lib/data/udhr_txt/udhr_nso.txt +0 -219
  245. data/lib/data/udhr_txt/udhr_nya_chechewa.txt +0 -221
  246. data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +0 -218
  247. data/lib/data/udhr_txt/udhr_nym.txt +0 -229
  248. data/lib/data/udhr_txt/udhr_nyn.txt +0 -213
  249. data/lib/data/udhr_txt/udhr_nzi.txt +0 -221
  250. data/lib/data/udhr_txt/udhr_ojb.txt +0 -221
  251. data/lib/data/udhr_txt/udhr_oss.txt +0 -214
  252. data/lib/data/udhr_txt/udhr_ote.txt +0 -218
  253. data/lib/data/udhr_txt/udhr_pam.txt +0 -225
  254. data/lib/data/udhr_txt/udhr_pan.txt +0 -227
  255. data/lib/data/udhr_txt/udhr_pau.txt +0 -219
  256. data/lib/data/udhr_txt/udhr_pbb.txt +0 -218
  257. data/lib/data/udhr_txt/udhr_pbu.txt +0 -9
  258. data/lib/data/udhr_txt/udhr_pcd.txt +0 -218
  259. data/lib/data/udhr_txt/udhr_pcm.txt +0 -218
  260. data/lib/data/udhr_txt/udhr_pes_1.txt +0 -218
  261. data/lib/data/udhr_txt/udhr_pes_2.txt +0 -222
  262. data/lib/data/udhr_txt/udhr_pis.txt +0 -219
  263. data/lib/data/udhr_txt/udhr_plt.txt +0 -214
  264. data/lib/data/udhr_txt/udhr_pnb.txt +0 -223
  265. data/lib/data/udhr_txt/udhr_pol.txt +0 -220
  266. data/lib/data/udhr_txt/udhr_pon.txt +0 -218
  267. data/lib/data/udhr_txt/udhr_por_BR.txt +0 -231
  268. data/lib/data/udhr_txt/udhr_por_PT.txt +0 -219
  269. data/lib/data/udhr_txt/udhr_pov.txt +0 -220
  270. data/lib/data/udhr_txt/udhr_ppl.txt +0 -219
  271. data/lib/data/udhr_txt/udhr_prq.txt +0 -151
  272. data/lib/data/udhr_txt/udhr_prv.txt +0 -207
  273. data/lib/data/udhr_txt/udhr_quc.txt +0 -217
  274. data/lib/data/udhr_txt/udhr_qud.txt +0 -218
  275. data/lib/data/udhr_txt/udhr_quy.txt +0 -221
  276. data/lib/data/udhr_txt/udhr_quz.txt +0 -223
  277. data/lib/data/udhr_txt/udhr_qva.txt +0 -219
  278. data/lib/data/udhr_txt/udhr_qvc.txt +0 -218
  279. data/lib/data/udhr_txt/udhr_qvh.txt +0 -217
  280. data/lib/data/udhr_txt/udhr_qvm.txt +0 -219
  281. data/lib/data/udhr_txt/udhr_qvn.txt +0 -217
  282. data/lib/data/udhr_txt/udhr_qwh.txt +0 -218
  283. data/lib/data/udhr_txt/udhr_qxa.txt +0 -217
  284. data/lib/data/udhr_txt/udhr_qxn.txt +0 -216
  285. data/lib/data/udhr_txt/udhr_qxu.txt +0 -221
  286. data/lib/data/udhr_txt/udhr_rar.txt +0 -220
  287. data/lib/data/udhr_txt/udhr_rmn.txt +0 -220
  288. data/lib/data/udhr_txt/udhr_rmn_1.txt +0 -221
  289. data/lib/data/udhr_txt/udhr_rmy.txt +0 -218
  290. data/lib/data/udhr_txt/udhr_roh.txt +0 -217
  291. data/lib/data/udhr_txt/udhr_ron_1953.txt +0 -218
  292. data/lib/data/udhr_txt/udhr_ron_1993.txt +0 -218
  293. data/lib/data/udhr_txt/udhr_ron_2006.txt +0 -218
  294. data/lib/data/udhr_txt/udhr_run.txt +0 -218
  295. data/lib/data/udhr_txt/udhr_rus.txt +0 -220
  296. data/lib/data/udhr_txt/udhr_sag.txt +0 -220
  297. data/lib/data/udhr_txt/udhr_san.txt +0 -219
  298. data/lib/data/udhr_txt/udhr_sco.txt +0 -222
  299. data/lib/data/udhr_txt/udhr_shp.txt +0 -224
  300. data/lib/data/udhr_txt/udhr_skr.txt +0 -225
  301. data/lib/data/udhr_txt/udhr_slk.txt +0 -219
  302. data/lib/data/udhr_txt/udhr_slv.txt +0 -218
  303. data/lib/data/udhr_txt/udhr_sme.txt +0 -220
  304. data/lib/data/udhr_txt/udhr_smo.txt +0 -226
  305. data/lib/data/udhr_txt/udhr_sna.txt +0 -223
  306. data/lib/data/udhr_txt/udhr_snk.txt +0 -220
  307. data/lib/data/udhr_txt/udhr_som.txt +0 -216
  308. data/lib/data/udhr_txt/udhr_sot.txt +0 -220
  309. data/lib/data/udhr_txt/udhr_spa.txt +0 -220
  310. data/lib/data/udhr_txt/udhr_src.txt +0 -220
  311. data/lib/data/udhr_txt/udhr_srp_cyrl.txt +0 -218
  312. data/lib/data/udhr_txt/udhr_srp_latn.txt +0 -218
  313. data/lib/data/udhr_txt/udhr_srr.txt +0 -219
  314. data/lib/data/udhr_txt/udhr_ssw.txt +0 -228
  315. data/lib/data/udhr_txt/udhr_suk.txt +0 -218
  316. data/lib/data/udhr_txt/udhr_sun.txt +0 -227
  317. data/lib/data/udhr_txt/udhr_sus.txt +0 -218
  318. data/lib/data/udhr_txt/udhr_swe.txt +0 -224
  319. data/lib/data/udhr_txt/udhr_swh.txt +0 -221
  320. data/lib/data/udhr_txt/udhr_tah.txt +0 -217
  321. data/lib/data/udhr_txt/udhr_taj.txt +0 -10
  322. data/lib/data/udhr_txt/udhr_tam.txt +0 -227
  323. data/lib/data/udhr_txt/udhr_tat.txt +0 -219
  324. data/lib/data/udhr_txt/udhr_tbz.txt +0 -219
  325. data/lib/data/udhr_txt/udhr_tca.txt +0 -219
  326. data/lib/data/udhr_txt/udhr_tem.txt +0 -216
  327. data/lib/data/udhr_txt/udhr_tet.txt +0 -219
  328. data/lib/data/udhr_txt/udhr_tgk.txt +0 -217
  329. data/lib/data/udhr_txt/udhr_tgl.txt +0 -224
  330. data/lib/data/udhr_txt/udhr_tgl_tglg.txt +0 -9
  331. data/lib/data/udhr_txt/udhr_tha.txt +0 -217
  332. data/lib/data/udhr_txt/udhr_tir.txt +0 -217
  333. data/lib/data/udhr_txt/udhr_tiv.txt +0 -232
  334. data/lib/data/udhr_txt/udhr_tob.txt +0 -218
  335. data/lib/data/udhr_txt/udhr_toi.txt +0 -216
  336. data/lib/data/udhr_txt/udhr_toj.txt +0 -219
  337. data/lib/data/udhr_txt/udhr_ton.txt +0 -221
  338. data/lib/data/udhr_txt/udhr_top.txt +0 -220
  339. data/lib/data/udhr_txt/udhr_tpi.txt +0 -219
  340. data/lib/data/udhr_txt/udhr_tsn.txt +0 -219
  341. data/lib/data/udhr_txt/udhr_tso_MZ.txt +0 -220
  342. data/lib/data/udhr_txt/udhr_tsz.txt +0 -218
  343. data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +0 -216
  344. data/lib/data/udhr_txt/udhr_tuk_latn.txt +0 -221
  345. data/lib/data/udhr_txt/udhr_tur.txt +0 -219
  346. data/lib/data/udhr_txt/udhr_tzc.txt +0 -219
  347. data/lib/data/udhr_txt/udhr_tzh.txt +0 -218
  348. data/lib/data/udhr_txt/udhr_tzm.txt +0 -220
  349. data/lib/data/udhr_txt/udhr_tzm_tfng.txt +0 -9
  350. data/lib/data/udhr_txt/udhr_uig_arab.txt +0 -219
  351. data/lib/data/udhr_txt/udhr_uig_latn.txt +0 -219
  352. data/lib/data/udhr_txt/udhr_ukr.txt +0 -218
  353. data/lib/data/udhr_txt/udhr_umb.txt +0 -218
  354. data/lib/data/udhr_txt/udhr_ura.txt +0 -219
  355. data/lib/data/udhr_txt/udhr_urd.txt +0 -9
  356. data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +0 -220
  357. data/lib/data/udhr_txt/udhr_uzn_latn.txt +0 -220
  358. data/lib/data/udhr_txt/udhr_vai.txt +0 -224
  359. data/lib/data/udhr_txt/udhr_vie.txt +0 -221
  360. data/lib/data/udhr_txt/udhr_vmw.txt +0 -220
  361. data/lib/data/udhr_txt/udhr_war.txt +0 -219
  362. data/lib/data/udhr_txt/udhr_wln.txt +0 -220
  363. data/lib/data/udhr_txt/udhr_wol.txt +0 -219
  364. data/lib/data/udhr_txt/udhr_wwa.txt +0 -109
  365. data/lib/data/udhr_txt/udhr_xho.txt +0 -219
  366. data/lib/data/udhr_txt/udhr_xsm.txt +0 -219
  367. data/lib/data/udhr_txt/udhr_yad.txt +0 -220
  368. data/lib/data/udhr_txt/udhr_yao.txt +0 -214
  369. data/lib/data/udhr_txt/udhr_yap.txt +0 -220
  370. data/lib/data/udhr_txt/udhr_ydd.txt +0 -223
  371. data/lib/data/udhr_txt/udhr_ykg.txt +0 -211
  372. data/lib/data/udhr_txt/udhr_yor.txt +0 -218
  373. data/lib/data/udhr_txt/udhr_yua.txt +0 -218
  374. data/lib/data/udhr_txt/udhr_zam.txt +0 -223
  375. data/lib/data/udhr_txt/udhr_ztu.txt +0 -219
  376. data/lib/data/udhr_txt/udhr_zul.txt +0 -219
  377. data/test/train.rb +0 -26
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ lib/data/**
data/README.markdown CHANGED
@@ -1,18 +1,61 @@
1
1
  #babel
2
2
 
3
3
  Babel is a gem to identify in what language a text is written.
4
- It is based on the n-gram approach by Cacnar and Trenkle as described in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
4
+ It is based on the n-gram approach by Cavnar and Trenkle as described
5
+ in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
5
6
 
6
7
 
7
8
  ##usage
8
9
  require 'rubygems'
9
- require 'simplificator-babel'
10
-
11
- # Train babel: feed it some texts
12
- 'An english text to train and learn'.language= 'en'
13
- 'Ein deutscher Text'.language= 'de'
10
+ require 'babel'
11
+
12
+ def guess_language(s)
13
+ puts "'#{s}' is probably '#{s.language}'"
14
+ end
15
+ # load the default profiles
16
+ Babel.load_profiles
17
+
18
+ # Let's see what Babel thinks about these texts
19
+ guess_language 'Montags ist es ruhig'
20
+ guess_language 'le coq est mort'
14
21
 
15
- puts
22
+ # Replace a profile with my own profile
23
+ Babel.load_profile('eng', '/path/to/my/english/profile.yml')
24
+
25
+ # Merge profile data
26
+ Babel.load_profile('eng', '/path/to/my/other/english/profile.yml', :merge => true)
27
+
28
+ # Show Top-3 Languages for a sentence
29
+ puts "What language could this be written in?".languages[0..2]
30
+
31
+ ##profiles
32
+ Profiles are collections of n-grams and the number of occurence of each ngram.
33
+ Babel uses n-grams with length 2-5 (bigram, trigram, tetragram, pentagram).
34
+ You can create your own profile and decide what n-grams to use and whether
35
+ you want to limit or not if you want to.
36
+
37
+ These profiles are shipped with the gem:
38
+ * german (deu) (this profile is built from udhr_deu_1996.txt)
39
+ * english (eng)
40
+ * french (fra)
41
+ * spanish (spa)
42
+ * italian (ita)
43
+
44
+ Want another profile built in? Send an email to info@simplificator.com and if there are enough
45
+ requests we add the profile.
46
+
47
+ The profiles that are shipped with babel are based on the texts found at
48
+ http://www.unicode.org/udhr/index_by_code.html
49
+
50
+ ##generating profiles
51
+ Profiles can be generated with the data found in http://www.unicode.org/udhr/assemblies/udhr_txt.zip or with any other text.
52
+ Once a profile is generated, Babel can store it in YAML format and load it again from YAML.
53
+
54
+ there is a rake task which simplifies profile generation:
55
+ rake babel:build_profile lang=foo file=myfile.txt dir=destination-directory
56
+
57
+ the file which is generated from this command can be loaded by
58
+ Babel.load_profile 'foo', 'profile_foo.yml'
16
59
 
17
60
  ##Copyright
18
61
 
data/Rakefile CHANGED
@@ -10,6 +10,9 @@ begin
10
10
  gem.homepage = "http://github.com/simplificator/babel"
11
11
  gem.authors = ["simplificator"]
12
12
  gem.add_dependency('ya2yaml', '>= 0.2.6')
13
+ gem.files.exclude 'lib/data'
14
+ #gem.files.exclude 'lib/data/*.xml'
15
+ gem.files.include 'lib/data/*.zip'
13
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
17
  end
15
18
  rescue LoadError
@@ -53,4 +56,47 @@ Rake::RDocTask.new do |rdoc|
53
56
  rdoc.rdoc_files.include('README*')
54
57
  rdoc.rdoc_files.include('lib/**/*.rb')
55
58
  end
59
+ require 'rubygems'
60
+ require 'zip/zip'
61
+ require 'lib/babel'
62
+
63
+ namespace :babel do
64
+ task :unpack_data do
65
+ dir = File.join(File.dirname(__FILE__), 'lib', 'data')
66
+ file = File.join(dir, 'udhr_txt.zip')
67
+ Zip::ZipFile.open(file) do |zip|
68
+ zip.each do |entry|
69
+ destination = File.join(dir, entry.name)
70
+ FileUtils.mkdir_p(File.dirname(destination))
71
+ FileUtils.rm(destination) if File.exists?(destination)
72
+ zip.extract(entry, destination)
73
+ end
74
+ end
75
+ FileUtils.cp(File.join(dir, 'udhr_deu_1996.txt'), File.join(dir, 'udhr_deu.txt'))
76
+ end
77
+
78
+ task :build_profile do
79
+ if ENV['lang']
80
+ lang = ENV['lang']
81
+ file = ENV['file']
82
+ dir = ENV['dir'] || File.dirname(__FILE__)
83
+ skip = ENV['skip']
84
+ limit = ENV['limit']
85
+ unless file
86
+ skip ||= 5 # skip header in data files. english all the time
87
+ file = File.join(File.dirname(__FILE__), 'lib', 'data', "udhr_#{lang}.txt")
88
+ end
89
+ puts "Learning about #{lang} from #{file} and save it to #{dir}"
90
+ File.open(file, 'r') do |f|
91
+ f.each_with_index do |line, index|
92
+ if index > skip
93
+ Babel.learn(lang, line)
94
+ end
95
+ end
96
+ end
97
+ Babel.save_profile(lang, :dir => dir, :limit => limit)
98
+ end
99
+ end
100
+
101
+ end
56
102
 
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 4
2
+ :patch: 0
3
3
  :major: 0
4
- :minor: 0
4
+ :minor: 1
data/babel.gemspec ADDED
@@ -0,0 +1,64 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{babel}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["simplificator"]
9
+ s.date = %q{2009-07-13}
10
+ s.email = %q{info@simplificator.com}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.markdown"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.markdown",
20
+ "Rakefile",
21
+ "VERSION.yml",
22
+ "babel.gemspec",
23
+ "lib/babel.rb",
24
+ "lib/babel/babel.rb",
25
+ "lib/babel/profile.rb",
26
+ "lib/babel/string_extensions.rb",
27
+ "lib/profiles/profile_deu.yml",
28
+ "lib/profiles/profile_eng.yml",
29
+ "lib/profiles/profile_fra.yml",
30
+ "lib/profiles/profile_ita.yml",
31
+ "lib/profiles/profile_spa.yml",
32
+ "lib/profiles/udhr_txt.zip",
33
+ "samples/guessing.rb",
34
+ "test/babel_test.rb",
35
+ "test/profile_test.rb",
36
+ "test/string_extensions_test.rb",
37
+ "test/test_helper.rb"
38
+ ]
39
+ s.has_rdoc = true
40
+ s.homepage = %q{http://github.com/simplificator/babel}
41
+ s.rdoc_options = ["--charset=UTF-8"]
42
+ s.require_paths = ["lib"]
43
+ s.rubygems_version = %q{1.3.2}
44
+ s.summary = %q{Utility to guess the language of a text}
45
+ s.test_files = [
46
+ "test/babel_test.rb",
47
+ "test/profile_test.rb",
48
+ "test/string_extensions_test.rb",
49
+ "test/test_helper.rb"
50
+ ]
51
+
52
+ if s.respond_to? :specification_version then
53
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
+ s.specification_version = 3
55
+
56
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
57
+ s.add_runtime_dependency(%q<ya2yaml>, [">= 0.2.6"])
58
+ else
59
+ s.add_dependency(%q<ya2yaml>, [">= 0.2.6"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<ya2yaml>, [">= 0.2.6"])
63
+ end
64
+ end
data/lib/babel/babel.rb CHANGED
@@ -1,54 +1,115 @@
1
+ #
2
+ #
3
+ # Profile Generation:
4
+ # Whenever it's about generating a Profile (Babel.learn, Babel.distances and Babel.guess)
5
+ # you can pass
6
+ # * :min_length (2)
7
+ # * :max_length (5)
8
+ # * :pad (true)
9
+ # They are just forwared to String.n_grams (default values in braces)
10
+ # It's highly recomended that you use the same settings for learning and guessing....
11
+
12
+
13
+
1
14
  module Babel
2
15
  @profiles = {}
3
16
  PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
17
+
18
+ # Learn that a text is in a given language.
19
+ # Calls Profile.learn for the profile with the given language.
4
20
  def self.learn(lang, text, options = {})
5
21
  lang = lang.to_s
6
- profile = @profiles[lang] ||= Profile.new()
22
+ profile = @profiles[lang] ||= Profile.new(lang)
7
23
  profile.learn(text, options)
8
24
  end
9
25
 
10
-
26
+ # Clear all the profiles
11
27
  def self.clear_profiles
12
28
  @profiles = {}
13
29
  end
30
+ # find the profile for a language
31
+ def self.profile(lang)
32
+ @profiles[lang]
33
+ end
14
34
 
15
- def self.guess(source, options = {})
16
- found = nil
17
- Babel.distances(source).each do |entry|
18
- found = entry if found.nil? || entry.last < found.last
35
+ # register a profile
36
+ # pass :merge => true to merge into an existing profile
37
+ def self.register_profile(profile, options = {})
38
+ if options[:merge] && @profiles[profile.language]
39
+ @profiles[profile.language].merge(profile)
40
+ else
41
+ @profiles[profile.language] = profile
19
42
  end
20
- found.first if found
21
43
  end
22
44
 
23
- # An array of arrays of [language, distance] arrays
24
- def self.distances(text)
25
- source = Profile.new.learn(text)
26
- @profiles.map { |lang, target| [lang, source.distance(target)] }
45
+ # Guess the language of a text.
46
+ # As soon as there is at least one profile, this method always
47
+ # returns a value (perhaps the wrong) one...
48
+ # I.e. if only "eng" profile is registered, then this method will always retun "eng"
49
+ # not matter what text pass
50
+ #
51
+ def self.guess(source, options = {})
52
+ distances = Babel.distances(source, options)
53
+ distances.first.first if distances.first
27
54
  end
28
-
29
- private
30
55
 
31
- def self.file_name(dir, lang)
32
- File.join(dir, "profile_#{lang}.yml")
56
+ # An array of arrays of [language, distance] arrays.
57
+ # The language with the shortest distance is the most probable solution.
58
+ # Sorted by distance, ascending (first item is most probable)
59
+ def self.distances(text, options = {})
60
+ source = Profile.new.learn(text, options)
61
+ @profiles.map { |lang, target| [lang, source.distance(target)] }.sort {|o1, o2| o1.last <=> o2.last}
33
62
  end
34
63
 
35
- # Load a specific profile ()
64
+
65
+ # Load all the profiles from a given directory.
66
+ # Loads all .yml files so be careful what directory you specify.
67
+ # options are:
68
+ # * :dir the directory, defaults to Babel::PROFILE_DIR
69
+ # See Babel.load_profile() for other options
36
70
  def self.load_profiles(options = {})
37
71
  dir = options[:directory] || PROFILE_DIR
38
72
  Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
39
- file =~ /profile_(.+)\.yml/
40
- @profiles[$1] = YAML.load_file(file)
73
+ Babel.load_profile(file, options)
41
74
  end
42
75
  end
43
76
 
77
+ # Load a single profile
78
+ # Options are:
79
+ # * :merge see Babel.register_profile for details
80
+ def self.load_profile(file, options = {})
81
+ Babel.register_profile(YAML.load_file(file), options)
82
+ end
83
+
84
+ # Save the profiles to a specifified directory.
85
+ # See Babel.save_profile() for options
44
86
  def self.save_profiles(options = {})
45
- dir = options[:directory] || PROFILE_DIR
46
87
  @profiles.each do |lang, profile|
47
- profile.limit(options[:limit]) if options.has_key?(:limit)
48
- File.open(file_name(dir, lang), 'wb') do |file|
49
- file.write(profile.ya2yaml)
50
- end
88
+ Babel.save_profile(lang, options)
89
+ end
90
+ end
91
+
92
+ # Save a specific profile
93
+ # Options are:
94
+ # * :dir -> the directory wo save the files to. Defaults to Babel::PROFILE_DIR
95
+ # * :limit -> Call limit() on the profile before save. This reduces the size of the profile
96
+ # for the cost of (possibly) less accurate language guessing
97
+ def self.save_profile(lang, options = {})
98
+ dir = options[:dir] || PROFILE_DIR
99
+ profile = Babel.profile(lang)
100
+ profile.limit(options[:limit]) if options[:limit]
101
+ File.open(file_name(dir, lang), 'wb') do |file|
102
+ file.write(profile.ya2yaml)
51
103
  end
104
+ end
105
+
106
+ private
107
+
108
+ # Build the file name for a profile file
109
+ # Naming scheme: profile_<LANG>.yml
110
+ def self.file_name(dir, lang)
111
+ File.join(dir, "profile_#{lang}.yml")
52
112
  end
113
+
53
114
  end
54
115
 
data/lib/babel/profile.rb CHANGED
@@ -1,43 +1,61 @@
1
1
  module Babel
2
2
  class Profile
3
- def initialize()
4
- @profile = {}
3
+ attr_reader :language
4
+ attr_reader :data
5
+ def initialize(language = nil)
6
+ @data = {}
5
7
  @total_occurences = 0
8
+ @language = language
6
9
  end
7
10
 
11
+
12
+ # learn a text
13
+ # following options are used when generating the n-grams:
14
+ # * min_length => 2
15
+ # * max_length => 5
16
+ # * pad => true
8
17
  def learn(text, options = {})
9
18
  options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
10
19
  text = clean(text)
11
20
  text.split(' ').each do |word|
12
- ngrams = word.ngrams(options)
13
- ngrams.each do |ngram|
21
+ word.n_grams(options).each do |ngram|
14
22
  self.occured(ngram)
15
23
  end
16
24
  end
25
+ # after learning rank the new n-grams
17
26
  self.rank
18
27
  self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
19
28
  end
20
29
 
21
30
 
31
+ def merge(other)
32
+ if self.language != other.language
33
+ raise ArgumentError.new("self has a language of #{self.language} but profile to merge has #{other.language}")
34
+ end
35
+ other.data.each do |key, value|
36
+ self.occured(key, value.first)
37
+ end
38
+ end
39
+
22
40
  # TODO: needed?
23
41
  def clean(text)
24
42
  return text
25
- text = text.gsub('?', '')
26
- text = text.gsub('.', '')
27
- text = text.gsub(';', '')
43
+ text = text.gsub(/[0-9]/, '')
28
44
  text = text.gsub(':', '')
45
+ text = text.gsub('/', '')
46
+ text = text.gsub('_', '')
29
47
  text = text.gsub('(', '')
30
48
  text = text.gsub(')', '')
31
- text = text.gsub('/', '')
32
- text = text.gsub(/[0-9]*/, '')
33
- text = text.gsub('+', '')
34
- text
49
+ text = text.gsub(';', '')
50
+ text = text.gsub('?', '')
51
+
52
+ return text
35
53
  end
54
+
36
55
  # limit this profile to n items
37
56
  # profile needs to be ranked first
38
- # do not use this if you plan to extend the profile later on
39
57
  def limit(boundary = 100)
40
- @profile.reject! do |key, value|
58
+ @data.reject! do |key, value|
41
59
  raise 'Please call rank() first' if value.last == 0
42
60
  boundary < value.last
43
61
  end
@@ -46,37 +64,37 @@ module Babel
46
64
  # rank the current profile
47
65
  # ngrams are sorted by occurence and then ranked
48
66
  def rank
49
- @profile.values.sort do |o1, o2|
50
- o2.first <=> o1.first
51
- end.each_with_index do |item, index|
52
- item[1] = index + 1
53
- end
67
+ #@data.values.sort do |o1, o2|
68
+ # o2.first <=> o1.first
69
+ #end.each_with_index do |item, index|
70
+ # item[1] = index + 1
71
+ #end
54
72
 
55
- @profile.values.each do |value|
73
+ @data.values.each do |value|
56
74
  value[1] = value[0] / @total_occurences.to_f
57
75
  end
58
76
  end
59
77
 
60
- # Called when a ngram is occured, optional you can pass an
78
+ # Called when a n-gram is occured, optional you can pass an
61
79
  # amount (how many times the ngram occured)
62
80
  def occured(ngram, amount = 1)
63
- (@profile[ngram] ||= [0, 0])[0] += amount
81
+ (@data[ngram] ||= [0, 0])[0] += amount
64
82
  @total_occurences += amount
65
83
  end
66
84
 
67
85
  # find the occurence of a ngram. if it never occured, returns 0
68
86
  def occurence(ngram)
69
- @profile[ngram] ? @profile[ngram].first : 0
87
+ @data[ngram] ? @data[ngram].first : 0
70
88
  end
71
89
 
72
90
  # find the ranking of a ngram. if it is not yet ranked, return 0
73
91
  def ranking(ngram)
74
- @profile[ngram] ? @profile[ngram].last : 0
92
+ @data[ngram] ? @data[ngram].last : 0
75
93
  end
76
94
 
77
95
  # Calculate the distance to another profile
78
96
  def distance(other)
79
- @profile.inject(0) do |memo, item|
97
+ @data.inject(0) do |memo, item|
80
98
  other_ranking = other.ranking(item.first)
81
99
  if other_ranking == 0
82
100
  memo += 1
@@ -88,7 +106,7 @@ module Babel
88
106
 
89
107
 
90
108
  def to_s
91
- @profile.inspect
109
+ @data.inspect
92
110
  end
93
111
  end
94
112
  end
@@ -1,15 +1,19 @@
1
1
  class String
2
-
3
- # TODO: recursive?
4
- def ngrams(options = {})
2
+ # Generate n-grams for a string.
3
+ # options are:
4
+ # :min_length : minimum length of the n-grams (defaults to 1)
5
+ # :max_length : maximum length of the n-grams (defaults to self.length)
6
+ # :pad : pad wiht '_' to generate all possible n-grams (defaults to false)
7
+ def n_grams(options = {})
8
+ # TODO: recursive?
9
+ # TODO: use min/max length for loop index instead of looping
10
+ # all and then use if test to decide if to add or not
5
11
  min_length = options[:min_length] || 1
6
12
  max_length = options[:max_length] || self.length
7
13
  pad = options[:pad] || false
8
14
  value = options[:preserve_case] ? self : self.downcase
9
15
  value = "_#{value}#{'_' * (value.length - 1)}" if pad
10
16
  res = []
11
- # TODO: use min/max length for loop index instead of looping
12
- # all and then use if test to decide if to add or not
13
17
  0.upto(value.length - 1) do |index|
14
18
  index.upto(value.length - 1) do |len|
15
19
  if value[index..len].length >= min_length && value[index..len].length <= max_length
@@ -20,22 +24,23 @@ class String
20
24
  res
21
25
  end
22
26
 
23
- # def byte_grams(options = {})
24
- # min_length = options[:min_length] || 1
25
- # max_length = options[:max_length] || self.length
26
- # value = options[:preserve_case] ? self : self.downcase
27
- # res = []
28
- #
29
- # end
30
-
31
-
32
- # Ask Babel about the language of this text
33
- # Can return nil if no language found
27
+ # Ask Babel about the language of this text.
28
+ # Convenience method, just calls Babel.guess().
29
+ # See Babel.guess for description.
34
30
  def language(options = {})
35
31
  Babel.guess(self, options)
36
32
  end
33
+ # Ask Bable about the languages this text could be.
34
+ # It will return all the registered languages with the most probable
35
+ # Language first. You might want to restrict this before presenting to
36
+ # the user.
37
+ def languages(options = {})
38
+ Babel.distances(self, options).map() {|item| item.first}
39
+ end
37
40
 
38
- # Tell Babel that this text is in a given language
41
+ # Tell Babel that this text is in a given language.
42
+ # Convenience method, just calls Babel.learn().
43
+ # See Babel.learn for description
39
44
  def language=(lang, options = {})
40
45
  Babel.learn(lang, self, options)
41
46
  end
@@ -1,5 +1,5 @@
1
1
  --- !ruby/object:Babel::Profile
2
- profile:
2
+ data:
3
3
  (i:
4
4
  - 1
5
5
  - 1.48557506610809e-05
@@ -25359,4 +25359,5 @@
25359
25359
  ützu:
25360
25360
  - 1
25361
25361
  - 1.48557506610809e-05
25362
+ language: deu
25362
25363
  total_occurences: 67314
@@ -1,5 +1,5 @@
1
1
  --- !ruby/object:Babel::Profile
2
- profile:
2
+ data:
3
3
  ? ",_"
4
4
  :
5
5
  - 94
@@ -20791,4 +20791,5 @@
20791
20791
  ‐se:
20792
20792
  - 1
20793
20793
  - 1.85742412422453e-05
20794
+ language: eng
20794
20795
  total_occurences: 53838
@@ -1,5 +1,5 @@
1
1
  --- !ruby/object:Babel::Profile
2
- profile:
2
+ data:
3
3
  ? ",_"
4
4
  :
5
5
  - 118
@@ -24961,4 +24961,5 @@
24961
24961
  ’é:
24962
24962
  - 8
24963
24963
  - 0.000123525415354209
24964
+ language: fra
24964
24965
  total_occurences: 64764