simplificator-babel 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (377) hide show
  1. data/.document +5 -0
  2. data/.gitignore +6 -0
  3. data/README.markdown +50 -7
  4. data/Rakefile +46 -0
  5. data/VERSION.yml +2 -2
  6. data/babel.gemspec +64 -0
  7. data/lib/babel/babel.rb +84 -23
  8. data/lib/babel/profile.rb +43 -25
  9. data/lib/babel/string_extensions.rb +22 -17
  10. data/lib/profiles/{profile_deu_1996.yml → profile_deu.yml} +2 -1
  11. data/lib/profiles/profile_eng.yml +2 -1
  12. data/lib/profiles/profile_fra.yml +2 -1
  13. data/lib/profiles/profile_ita.yml +22432 -0
  14. data/lib/profiles/profile_spa.yml +2 -1
  15. data/lib/profiles/udhr_txt.zip +0 -0
  16. data/samples/guessing.rb +28 -0
  17. data/test/babel_test.rb +3 -34
  18. data/test/string_extensions_test.rb +10 -8
  19. metadata +8 -361
  20. data/lib/data/udhr_txt/index.xml +0 -385
  21. data/lib/data/udhr_txt/udhr_007.txt +0 -220
  22. data/lib/data/udhr_txt/udhr_008.txt +0 -220
  23. data/lib/data/udhr_txt/udhr_009.txt +0 -228
  24. data/lib/data/udhr_txt/udhr_010.txt +0 -219
  25. data/lib/data/udhr_txt/udhr_011.txt +0 -232
  26. data/lib/data/udhr_txt/udhr_abk.txt +0 -218
  27. data/lib/data/udhr_txt/udhr_ace.txt +0 -221
  28. data/lib/data/udhr_txt/udhr_acu.txt +0 -222
  29. data/lib/data/udhr_txt/udhr_ada.txt +0 -220
  30. data/lib/data/udhr_txt/udhr_afr.txt +0 -219
  31. data/lib/data/udhr_txt/udhr_agr.txt +0 -219
  32. data/lib/data/udhr_txt/udhr_aii.txt +0 -216
  33. data/lib/data/udhr_txt/udhr_ajg.txt +0 -219
  34. data/lib/data/udhr_txt/udhr_aka_akuapem.txt +0 -221
  35. data/lib/data/udhr_txt/udhr_aka_asante.txt +0 -220
  36. data/lib/data/udhr_txt/udhr_aka_fante.txt +0 -219
  37. data/lib/data/udhr_txt/udhr_als.txt +0 -220
  38. data/lib/data/udhr_txt/udhr_amc.txt +0 -215
  39. data/lib/data/udhr_txt/udhr_ame.txt +0 -222
  40. data/lib/data/udhr_txt/udhr_amh.txt +0 -209
  41. data/lib/data/udhr_txt/udhr_amr.txt +0 -221
  42. data/lib/data/udhr_txt/udhr_arb.txt +0 -220
  43. data/lib/data/udhr_txt/udhr_arl.txt +0 -222
  44. data/lib/data/udhr_txt/udhr_arn.txt +0 -218
  45. data/lib/data/udhr_txt/udhr_ast.txt +0 -221
  46. data/lib/data/udhr_txt/udhr_auv.txt +0 -217
  47. data/lib/data/udhr_txt/udhr_ayr.txt +0 -218
  48. data/lib/data/udhr_txt/udhr_azj_cyrl.txt +0 -218
  49. data/lib/data/udhr_txt/udhr_azj_latn.txt +0 -218
  50. data/lib/data/udhr_txt/udhr_bam.txt +0 -218
  51. data/lib/data/udhr_txt/udhr_ban.txt +0 -222
  52. data/lib/data/udhr_txt/udhr_bba.txt +0 -218
  53. data/lib/data/udhr_txt/udhr_bci.txt +0 -217
  54. data/lib/data/udhr_txt/udhr_bcl.txt +0 -219
  55. data/lib/data/udhr_txt/udhr_bel.txt +0 -221
  56. data/lib/data/udhr_txt/udhr_bem.txt +0 -217
  57. data/lib/data/udhr_txt/udhr_ben.txt +0 -222
  58. data/lib/data/udhr_txt/udhr_bho.txt +0 -219
  59. data/lib/data/udhr_txt/udhr_bin.txt +0 -232
  60. data/lib/data/udhr_txt/udhr_bis.txt +0 -218
  61. data/lib/data/udhr_txt/udhr_blu.txt +0 -219
  62. data/lib/data/udhr_txt/udhr_boa.txt +0 -223
  63. data/lib/data/udhr_txt/udhr_bod.txt +0 -221
  64. data/lib/data/udhr_txt/udhr_bos_cyrl.txt +0 -220
  65. data/lib/data/udhr_txt/udhr_bos_latn.txt +0 -220
  66. data/lib/data/udhr_txt/udhr_bre.txt +0 -222
  67. data/lib/data/udhr_txt/udhr_btb.txt +0 -217
  68. data/lib/data/udhr_txt/udhr_bug.txt +0 -222
  69. data/lib/data/udhr_txt/udhr_bul.txt +0 -218
  70. data/lib/data/udhr_txt/udhr_cab.txt +0 -221
  71. data/lib/data/udhr_txt/udhr_cak.txt +0 -217
  72. data/lib/data/udhr_txt/udhr_cat.txt +0 -220
  73. data/lib/data/udhr_txt/udhr_cbr.txt +0 -219
  74. data/lib/data/udhr_txt/udhr_cbs.txt +0 -153
  75. data/lib/data/udhr_txt/udhr_cbt.txt +0 -220
  76. data/lib/data/udhr_txt/udhr_cbu.txt +0 -218
  77. data/lib/data/udhr_txt/udhr_ccx.txt +0 -222
  78. data/lib/data/udhr_txt/udhr_ceb.txt +0 -218
  79. data/lib/data/udhr_txt/udhr_ces.txt +0 -221
  80. data/lib/data/udhr_txt/udhr_cha.txt +0 -219
  81. data/lib/data/udhr_txt/udhr_chj.txt +0 -220
  82. data/lib/data/udhr_txt/udhr_chk.txt +0 -220
  83. data/lib/data/udhr_txt/udhr_chr.txt +0 -10
  84. data/lib/data/udhr_txt/udhr_cic.txt +0 -220
  85. data/lib/data/udhr_txt/udhr_cjk.txt +0 -218
  86. data/lib/data/udhr_txt/udhr_cjk_AO.txt +0 -220
  87. data/lib/data/udhr_txt/udhr_ckb.txt +0 -217
  88. data/lib/data/udhr_txt/udhr_cmn_hans.txt +0 -220
  89. data/lib/data/udhr_txt/udhr_cmn_hant.txt +0 -220
  90. data/lib/data/udhr_txt/udhr_cnh.txt +0 -220
  91. data/lib/data/udhr_txt/udhr_cni.txt +0 -220
  92. data/lib/data/udhr_txt/udhr_cos.txt +0 -218
  93. data/lib/data/udhr_txt/udhr_cot.txt +0 -222
  94. data/lib/data/udhr_txt/udhr_cpu.txt +0 -219
  95. data/lib/data/udhr_txt/udhr_crs.txt +0 -217
  96. data/lib/data/udhr_txt/udhr_csa.txt +0 -223
  97. data/lib/data/udhr_txt/udhr_csw.txt +0 -163
  98. data/lib/data/udhr_txt/udhr_ctd.txt +0 -222
  99. data/lib/data/udhr_txt/udhr_cym.txt +0 -222
  100. data/lib/data/udhr_txt/udhr_dag.txt +0 -217
  101. data/lib/data/udhr_txt/udhr_dan.txt +0 -224
  102. data/lib/data/udhr_txt/udhr_ddn.txt +0 -217
  103. data/lib/data/udhr_txt/udhr_deu_1901.txt +0 -220
  104. data/lib/data/udhr_txt/udhr_deu_1996.txt +0 -220
  105. data/lib/data/udhr_txt/udhr_dga.txt +0 -220
  106. data/lib/data/udhr_txt/udhr_dip.txt +0 -217
  107. data/lib/data/udhr_txt/udhr_div.txt +0 -220
  108. data/lib/data/udhr_txt/udhr_dyo.txt +0 -217
  109. data/lib/data/udhr_txt/udhr_dzo.txt +0 -9
  110. data/lib/data/udhr_txt/udhr_ell_monotonic.txt +0 -220
  111. data/lib/data/udhr_txt/udhr_ell_polytonic.txt +0 -220
  112. data/lib/data/udhr_txt/udhr_emk.txt +0 -218
  113. data/lib/data/udhr_txt/udhr_eml.txt +0 -219
  114. data/lib/data/udhr_txt/udhr_eng.txt +0 -219
  115. data/lib/data/udhr_txt/udhr_epo.txt +0 -221
  116. data/lib/data/udhr_txt/udhr_est.txt +0 -219
  117. data/lib/data/udhr_txt/udhr_eus.txt +0 -220
  118. data/lib/data/udhr_txt/udhr_eve.txt +0 -207
  119. data/lib/data/udhr_txt/udhr_ewe.txt +0 -218
  120. data/lib/data/udhr_txt/udhr_fao.txt +0 -219
  121. data/lib/data/udhr_txt/udhr_fij.txt +0 -224
  122. data/lib/data/udhr_txt/udhr_fin.txt +0 -224
  123. data/lib/data/udhr_txt/udhr_flm.txt +0 -219
  124. data/lib/data/udhr_txt/udhr_fon.txt +0 -217
  125. data/lib/data/udhr_txt/udhr_fra.txt +0 -218
  126. data/lib/data/udhr_txt/udhr_fri.txt +0 -219
  127. data/lib/data/udhr_txt/udhr_fuc.txt +0 -217
  128. data/lib/data/udhr_txt/udhr_fur.txt +0 -220
  129. data/lib/data/udhr_txt/udhr_gaa.txt +0 -220
  130. data/lib/data/udhr_txt/udhr_gag.txt +0 -223
  131. data/lib/data/udhr_txt/udhr_gax.txt +0 -222
  132. data/lib/data/udhr_txt/udhr_gjn.txt +0 -220
  133. data/lib/data/udhr_txt/udhr_gkp.txt +0 -216
  134. data/lib/data/udhr_txt/udhr_gla.txt +0 -229
  135. data/lib/data/udhr_txt/udhr_gle.txt +0 -215
  136. data/lib/data/udhr_txt/udhr_glg.txt +0 -217
  137. data/lib/data/udhr_txt/udhr_guc.txt +0 -221
  138. data/lib/data/udhr_txt/udhr_gug.txt +0 -210
  139. data/lib/data/udhr_txt/udhr_guj.txt +0 -219
  140. data/lib/data/udhr_txt/udhr_gyr.txt +0 -203
  141. data/lib/data/udhr_txt/udhr_hat_kreyol.txt +0 -221
  142. data/lib/data/udhr_txt/udhr_hat_popular.txt +0 -221
  143. data/lib/data/udhr_txt/udhr_hau_NE.txt +0 -219
  144. data/lib/data/udhr_txt/udhr_hau_NG.txt +0 -219
  145. data/lib/data/udhr_txt/udhr_haw.txt +0 -219
  146. data/lib/data/udhr_txt/udhr_hea.txt +0 -219
  147. data/lib/data/udhr_txt/udhr_heb.txt +0 -216
  148. data/lib/data/udhr_txt/udhr_hil.txt +0 -217
  149. data/lib/data/udhr_txt/udhr_hin.txt +0 -222
  150. data/lib/data/udhr_txt/udhr_hms.txt +0 -219
  151. data/lib/data/udhr_txt/udhr_hna.txt +0 -217
  152. data/lib/data/udhr_txt/udhr_hni.txt +0 -218
  153. data/lib/data/udhr_txt/udhr_hrv.txt +0 -218
  154. data/lib/data/udhr_txt/udhr_hsb.txt +0 -220
  155. data/lib/data/udhr_txt/udhr_hun.txt +0 -218
  156. data/lib/data/udhr_txt/udhr_hus.txt +0 -222
  157. data/lib/data/udhr_txt/udhr_huu.txt +0 -220
  158. data/lib/data/udhr_txt/udhr_hva.txt +0 -220
  159. data/lib/data/udhr_txt/udhr_hye.txt +0 -234
  160. data/lib/data/udhr_txt/udhr_ibb.txt +0 -235
  161. data/lib/data/udhr_txt/udhr_ibo.txt +0 -219
  162. data/lib/data/udhr_txt/udhr_ido.txt +0 -224
  163. data/lib/data/udhr_txt/udhr_iii.txt +0 -9
  164. data/lib/data/udhr_txt/udhr_ike.txt +0 -163
  165. data/lib/data/udhr_txt/udhr_ilo.txt +0 -217
  166. data/lib/data/udhr_txt/udhr_ina.txt +0 -220
  167. data/lib/data/udhr_txt/udhr_ind.txt +0 -219
  168. data/lib/data/udhr_txt/udhr_isl.txt +0 -217
  169. data/lib/data/udhr_txt/udhr_ita.txt +0 -221
  170. data/lib/data/udhr_txt/udhr_jav.txt +0 -222
  171. data/lib/data/udhr_txt/udhr_jpn.txt +0 -219
  172. data/lib/data/udhr_txt/udhr_kal.txt +0 -218
  173. data/lib/data/udhr_txt/udhr_kan.txt +0 -216
  174. data/lib/data/udhr_txt/udhr_kat.txt +0 -221
  175. data/lib/data/udhr_txt/udhr_kaz.txt +0 -218
  176. data/lib/data/udhr_txt/udhr_kbp.txt +0 -218
  177. data/lib/data/udhr_txt/udhr_kde.txt +0 -212
  178. data/lib/data/udhr_txt/udhr_kea.txt +0 -219
  179. data/lib/data/udhr_txt/udhr_kek.txt +0 -219
  180. data/lib/data/udhr_txt/udhr_khk.txt +0 -217
  181. data/lib/data/udhr_txt/udhr_khk_mong.txt +0 -11
  182. data/lib/data/udhr_txt/udhr_khm.txt +0 -220
  183. data/lib/data/udhr_txt/udhr_kin.txt +0 -220
  184. data/lib/data/udhr_txt/udhr_kir.txt +0 -220
  185. data/lib/data/udhr_txt/udhr_kmb.txt +0 -219
  186. data/lib/data/udhr_txt/udhr_knc.txt +0 -230
  187. data/lib/data/udhr_txt/udhr_kng.txt +0 -219
  188. data/lib/data/udhr_txt/udhr_kng_AO.txt +0 -219
  189. data/lib/data/udhr_txt/udhr_koo.txt +0 -216
  190. data/lib/data/udhr_txt/udhr_kor.txt +0 -219
  191. data/lib/data/udhr_txt/udhr_kqn.txt +0 -218
  192. data/lib/data/udhr_txt/udhr_kri.txt +0 -226
  193. data/lib/data/udhr_txt/udhr_ktu.txt +0 -219
  194. data/lib/data/udhr_txt/udhr_lao.txt +0 -223
  195. data/lib/data/udhr_txt/udhr_lat.txt +0 -221
  196. data/lib/data/udhr_txt/udhr_lat_1.txt +0 -220
  197. data/lib/data/udhr_txt/udhr_lav.txt +0 -220
  198. data/lib/data/udhr_txt/udhr_lia.txt +0 -218
  199. data/lib/data/udhr_txt/udhr_lin.txt +0 -217
  200. data/lib/data/udhr_txt/udhr_lin_tones.txt +0 -214
  201. data/lib/data/udhr_txt/udhr_lit.txt +0 -218
  202. data/lib/data/udhr_txt/udhr_lnc.txt +0 -219
  203. data/lib/data/udhr_txt/udhr_lns.txt +0 -219
  204. data/lib/data/udhr_txt/udhr_loz.txt +0 -219
  205. data/lib/data/udhr_txt/udhr_ltz.txt +0 -218
  206. data/lib/data/udhr_txt/udhr_lua.txt +0 -219
  207. data/lib/data/udhr_txt/udhr_lue.txt +0 -217
  208. data/lib/data/udhr_txt/udhr_lug.txt +0 -216
  209. data/lib/data/udhr_txt/udhr_lun.txt +0 -216
  210. data/lib/data/udhr_txt/udhr_mad.txt +0 -223
  211. data/lib/data/udhr_txt/udhr_mag.txt +0 -220
  212. data/lib/data/udhr_txt/udhr_mah.txt +0 -220
  213. data/lib/data/udhr_txt/udhr_mai.txt +0 -223
  214. data/lib/data/udhr_txt/udhr_mal.txt +0 -210
  215. data/lib/data/udhr_txt/udhr_mam.txt +0 -218
  216. data/lib/data/udhr_txt/udhr_mar.txt +0 -219
  217. data/lib/data/udhr_txt/udhr_maz.txt +0 -218
  218. data/lib/data/udhr_txt/udhr_mcd.txt +0 -220
  219. data/lib/data/udhr_txt/udhr_mcf.txt +0 -223
  220. data/lib/data/udhr_txt/udhr_men.txt +0 -222
  221. data/lib/data/udhr_txt/udhr_mic.txt +0 -218
  222. data/lib/data/udhr_txt/udhr_min.txt +0 -221
  223. data/lib/data/udhr_txt/udhr_miq.txt +0 -213
  224. data/lib/data/udhr_txt/udhr_mkd.txt +0 -221
  225. data/lib/data/udhr_txt/udhr_mlt.txt +0 -217
  226. data/lib/data/udhr_txt/udhr_mly_arab.txt +0 -219
  227. data/lib/data/udhr_txt/udhr_mly_latn.txt +0 -218
  228. data/lib/data/udhr_txt/udhr_mos.txt +0 -216
  229. data/lib/data/udhr_txt/udhr_mri.txt +0 -219
  230. data/lib/data/udhr_txt/udhr_mxi.txt +0 -218
  231. data/lib/data/udhr_txt/udhr_mxv.txt +0 -223
  232. data/lib/data/udhr_txt/udhr_mya.txt +0 -219
  233. data/lib/data/udhr_txt/udhr_mzi.txt +0 -227
  234. data/lib/data/udhr_txt/udhr_nav.txt +0 -219
  235. data/lib/data/udhr_txt/udhr_nba.txt +0 -257
  236. data/lib/data/udhr_txt/udhr_nbl.txt +0 -218
  237. data/lib/data/udhr_txt/udhr_ndo.txt +0 -217
  238. data/lib/data/udhr_txt/udhr_nep.txt +0 -214
  239. data/lib/data/udhr_txt/udhr_nhn.txt +0 -221
  240. data/lib/data/udhr_txt/udhr_nld.txt +0 -217
  241. data/lib/data/udhr_txt/udhr_nno.txt +0 -219
  242. data/lib/data/udhr_txt/udhr_nob.txt +0 -225
  243. data/lib/data/udhr_txt/udhr_not.txt +0 -218
  244. data/lib/data/udhr_txt/udhr_nso.txt +0 -219
  245. data/lib/data/udhr_txt/udhr_nya_chechewa.txt +0 -221
  246. data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +0 -218
  247. data/lib/data/udhr_txt/udhr_nym.txt +0 -229
  248. data/lib/data/udhr_txt/udhr_nyn.txt +0 -213
  249. data/lib/data/udhr_txt/udhr_nzi.txt +0 -221
  250. data/lib/data/udhr_txt/udhr_ojb.txt +0 -221
  251. data/lib/data/udhr_txt/udhr_oss.txt +0 -214
  252. data/lib/data/udhr_txt/udhr_ote.txt +0 -218
  253. data/lib/data/udhr_txt/udhr_pam.txt +0 -225
  254. data/lib/data/udhr_txt/udhr_pan.txt +0 -227
  255. data/lib/data/udhr_txt/udhr_pau.txt +0 -219
  256. data/lib/data/udhr_txt/udhr_pbb.txt +0 -218
  257. data/lib/data/udhr_txt/udhr_pbu.txt +0 -9
  258. data/lib/data/udhr_txt/udhr_pcd.txt +0 -218
  259. data/lib/data/udhr_txt/udhr_pcm.txt +0 -218
  260. data/lib/data/udhr_txt/udhr_pes_1.txt +0 -218
  261. data/lib/data/udhr_txt/udhr_pes_2.txt +0 -222
  262. data/lib/data/udhr_txt/udhr_pis.txt +0 -219
  263. data/lib/data/udhr_txt/udhr_plt.txt +0 -214
  264. data/lib/data/udhr_txt/udhr_pnb.txt +0 -223
  265. data/lib/data/udhr_txt/udhr_pol.txt +0 -220
  266. data/lib/data/udhr_txt/udhr_pon.txt +0 -218
  267. data/lib/data/udhr_txt/udhr_por_BR.txt +0 -231
  268. data/lib/data/udhr_txt/udhr_por_PT.txt +0 -219
  269. data/lib/data/udhr_txt/udhr_pov.txt +0 -220
  270. data/lib/data/udhr_txt/udhr_ppl.txt +0 -219
  271. data/lib/data/udhr_txt/udhr_prq.txt +0 -151
  272. data/lib/data/udhr_txt/udhr_prv.txt +0 -207
  273. data/lib/data/udhr_txt/udhr_quc.txt +0 -217
  274. data/lib/data/udhr_txt/udhr_qud.txt +0 -218
  275. data/lib/data/udhr_txt/udhr_quy.txt +0 -221
  276. data/lib/data/udhr_txt/udhr_quz.txt +0 -223
  277. data/lib/data/udhr_txt/udhr_qva.txt +0 -219
  278. data/lib/data/udhr_txt/udhr_qvc.txt +0 -218
  279. data/lib/data/udhr_txt/udhr_qvh.txt +0 -217
  280. data/lib/data/udhr_txt/udhr_qvm.txt +0 -219
  281. data/lib/data/udhr_txt/udhr_qvn.txt +0 -217
  282. data/lib/data/udhr_txt/udhr_qwh.txt +0 -218
  283. data/lib/data/udhr_txt/udhr_qxa.txt +0 -217
  284. data/lib/data/udhr_txt/udhr_qxn.txt +0 -216
  285. data/lib/data/udhr_txt/udhr_qxu.txt +0 -221
  286. data/lib/data/udhr_txt/udhr_rar.txt +0 -220
  287. data/lib/data/udhr_txt/udhr_rmn.txt +0 -220
  288. data/lib/data/udhr_txt/udhr_rmn_1.txt +0 -221
  289. data/lib/data/udhr_txt/udhr_rmy.txt +0 -218
  290. data/lib/data/udhr_txt/udhr_roh.txt +0 -217
  291. data/lib/data/udhr_txt/udhr_ron_1953.txt +0 -218
  292. data/lib/data/udhr_txt/udhr_ron_1993.txt +0 -218
  293. data/lib/data/udhr_txt/udhr_ron_2006.txt +0 -218
  294. data/lib/data/udhr_txt/udhr_run.txt +0 -218
  295. data/lib/data/udhr_txt/udhr_rus.txt +0 -220
  296. data/lib/data/udhr_txt/udhr_sag.txt +0 -220
  297. data/lib/data/udhr_txt/udhr_san.txt +0 -219
  298. data/lib/data/udhr_txt/udhr_sco.txt +0 -222
  299. data/lib/data/udhr_txt/udhr_shp.txt +0 -224
  300. data/lib/data/udhr_txt/udhr_skr.txt +0 -225
  301. data/lib/data/udhr_txt/udhr_slk.txt +0 -219
  302. data/lib/data/udhr_txt/udhr_slv.txt +0 -218
  303. data/lib/data/udhr_txt/udhr_sme.txt +0 -220
  304. data/lib/data/udhr_txt/udhr_smo.txt +0 -226
  305. data/lib/data/udhr_txt/udhr_sna.txt +0 -223
  306. data/lib/data/udhr_txt/udhr_snk.txt +0 -220
  307. data/lib/data/udhr_txt/udhr_som.txt +0 -216
  308. data/lib/data/udhr_txt/udhr_sot.txt +0 -220
  309. data/lib/data/udhr_txt/udhr_spa.txt +0 -220
  310. data/lib/data/udhr_txt/udhr_src.txt +0 -220
  311. data/lib/data/udhr_txt/udhr_srp_cyrl.txt +0 -218
  312. data/lib/data/udhr_txt/udhr_srp_latn.txt +0 -218
  313. data/lib/data/udhr_txt/udhr_srr.txt +0 -219
  314. data/lib/data/udhr_txt/udhr_ssw.txt +0 -228
  315. data/lib/data/udhr_txt/udhr_suk.txt +0 -218
  316. data/lib/data/udhr_txt/udhr_sun.txt +0 -227
  317. data/lib/data/udhr_txt/udhr_sus.txt +0 -218
  318. data/lib/data/udhr_txt/udhr_swe.txt +0 -224
  319. data/lib/data/udhr_txt/udhr_swh.txt +0 -221
  320. data/lib/data/udhr_txt/udhr_tah.txt +0 -217
  321. data/lib/data/udhr_txt/udhr_taj.txt +0 -10
  322. data/lib/data/udhr_txt/udhr_tam.txt +0 -227
  323. data/lib/data/udhr_txt/udhr_tat.txt +0 -219
  324. data/lib/data/udhr_txt/udhr_tbz.txt +0 -219
  325. data/lib/data/udhr_txt/udhr_tca.txt +0 -219
  326. data/lib/data/udhr_txt/udhr_tem.txt +0 -216
  327. data/lib/data/udhr_txt/udhr_tet.txt +0 -219
  328. data/lib/data/udhr_txt/udhr_tgk.txt +0 -217
  329. data/lib/data/udhr_txt/udhr_tgl.txt +0 -224
  330. data/lib/data/udhr_txt/udhr_tgl_tglg.txt +0 -9
  331. data/lib/data/udhr_txt/udhr_tha.txt +0 -217
  332. data/lib/data/udhr_txt/udhr_tir.txt +0 -217
  333. data/lib/data/udhr_txt/udhr_tiv.txt +0 -232
  334. data/lib/data/udhr_txt/udhr_tob.txt +0 -218
  335. data/lib/data/udhr_txt/udhr_toi.txt +0 -216
  336. data/lib/data/udhr_txt/udhr_toj.txt +0 -219
  337. data/lib/data/udhr_txt/udhr_ton.txt +0 -221
  338. data/lib/data/udhr_txt/udhr_top.txt +0 -220
  339. data/lib/data/udhr_txt/udhr_tpi.txt +0 -219
  340. data/lib/data/udhr_txt/udhr_tsn.txt +0 -219
  341. data/lib/data/udhr_txt/udhr_tso_MZ.txt +0 -220
  342. data/lib/data/udhr_txt/udhr_tsz.txt +0 -218
  343. data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +0 -216
  344. data/lib/data/udhr_txt/udhr_tuk_latn.txt +0 -221
  345. data/lib/data/udhr_txt/udhr_tur.txt +0 -219
  346. data/lib/data/udhr_txt/udhr_tzc.txt +0 -219
  347. data/lib/data/udhr_txt/udhr_tzh.txt +0 -218
  348. data/lib/data/udhr_txt/udhr_tzm.txt +0 -220
  349. data/lib/data/udhr_txt/udhr_tzm_tfng.txt +0 -9
  350. data/lib/data/udhr_txt/udhr_uig_arab.txt +0 -219
  351. data/lib/data/udhr_txt/udhr_uig_latn.txt +0 -219
  352. data/lib/data/udhr_txt/udhr_ukr.txt +0 -218
  353. data/lib/data/udhr_txt/udhr_umb.txt +0 -218
  354. data/lib/data/udhr_txt/udhr_ura.txt +0 -219
  355. data/lib/data/udhr_txt/udhr_urd.txt +0 -9
  356. data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +0 -220
  357. data/lib/data/udhr_txt/udhr_uzn_latn.txt +0 -220
  358. data/lib/data/udhr_txt/udhr_vai.txt +0 -224
  359. data/lib/data/udhr_txt/udhr_vie.txt +0 -221
  360. data/lib/data/udhr_txt/udhr_vmw.txt +0 -220
  361. data/lib/data/udhr_txt/udhr_war.txt +0 -219
  362. data/lib/data/udhr_txt/udhr_wln.txt +0 -220
  363. data/lib/data/udhr_txt/udhr_wol.txt +0 -219
  364. data/lib/data/udhr_txt/udhr_wwa.txt +0 -109
  365. data/lib/data/udhr_txt/udhr_xho.txt +0 -219
  366. data/lib/data/udhr_txt/udhr_xsm.txt +0 -219
  367. data/lib/data/udhr_txt/udhr_yad.txt +0 -220
  368. data/lib/data/udhr_txt/udhr_yao.txt +0 -214
  369. data/lib/data/udhr_txt/udhr_yap.txt +0 -220
  370. data/lib/data/udhr_txt/udhr_ydd.txt +0 -223
  371. data/lib/data/udhr_txt/udhr_ykg.txt +0 -211
  372. data/lib/data/udhr_txt/udhr_yor.txt +0 -218
  373. data/lib/data/udhr_txt/udhr_yua.txt +0 -218
  374. data/lib/data/udhr_txt/udhr_zam.txt +0 -223
  375. data/lib/data/udhr_txt/udhr_ztu.txt +0 -219
  376. data/lib/data/udhr_txt/udhr_zul.txt +0 -219
  377. data/test/train.rb +0 -26
data/.document ADDED
@@ -0,0 +1,5 @@
1
+ README.rdoc
2
+ lib/**/*.rb
3
+ bin/*
4
+ features/**/*.feature
5
+ LICENSE
data/.gitignore ADDED
@@ -0,0 +1,6 @@
1
+ *.sw?
2
+ .DS_Store
3
+ coverage
4
+ rdoc
5
+ pkg
6
+ lib/data/**
data/README.markdown CHANGED
@@ -1,18 +1,61 @@
1
1
  #babel
2
2
 
3
3
  Babel is a gem to identify in what language a text is written.
4
- It is based on the n-gram approach by Cacnar and Trenkle as described in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
4
+ It is based on the n-gram approach by Cavnar and Trenkle as described
5
+ in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
5
6
 
6
7
 
7
8
  ##usage
8
9
  require 'rubygems'
9
- require 'simplificator-babel'
10
-
11
- # Train babel: feed it some texts
12
- 'An english text to train and learn'.language= 'en'
13
- 'Ein deutscher Text'.language= 'de'
10
+ require 'babel'
11
+
12
+ def guess_language(s)
13
+ puts "'#{s}' is probably '#{s.language}'"
14
+ end
15
+ # load the default profiles
16
+ Babel.load_profiles
17
+
18
+ # Let's see what Babel thinks about these texts
19
+ guess_language 'Montags ist es ruhig'
20
+ guess_language 'le coq est mort'
14
21
 
15
- puts
22
+ # Replace a profile with my own profile
23
+ Babel.load_profile('eng', '/path/to/my/english/profile.yml')
24
+
25
+ # Merge profile data
26
+ Babel.load_profile('eng', '/path/to/my/other/english/profile.yml', :merge => true)
27
+
28
+ # Show Top-3 Languages for a sentence
29
+ puts "What language could this be written in?".languages[0..2]
30
+
31
+ ##profiles
32
+ Profiles are collections of n-grams and the number of occurence of each ngram.
33
+ Babel uses n-grams with length 2-5 (bigram, trigram, tetragram, pentagram).
34
+ You can create your own profile and decide what n-grams to use and whether
35
+ you want to limit or not if you want to.
36
+
37
+ These profiles are shipped with the gem:
38
+ * german (deu) (this profile is built from udhr_deu_1996.txt)
39
+ * english (eng)
40
+ * french (fra)
41
+ * spanish (spa)
42
+ * italian (ita)
43
+
44
+ Want another profile built in? Send an email to info@simplificator.com and if there are enough
45
+ requests we add the profile.
46
+
47
+ The profiles that are shipped with babel are based on the texts found at
48
+ http://www.unicode.org/udhr/index_by_code.html
49
+
50
+ ##generating profiles
51
+ Profiles can be generated with the data found in http://www.unicode.org/udhr/assemblies/udhr_txt.zip or with any other text.
52
+ Once a profile is generated, Babel can store it in YAML format and load it again from YAML.
53
+
54
+ there is a rake task which simplifies profile generation:
55
+ rake babel:build_profile lang=foo file=myfile.txt dir=destination-directory
56
+
57
+ the file which is generated from this command can be loaded by
58
+ Babel.load_profile 'foo', 'profile_foo.yml'
16
59
 
17
60
  ##Copyright
18
61
 
data/Rakefile CHANGED
@@ -10,6 +10,9 @@ begin
10
10
  gem.homepage = "http://github.com/simplificator/babel"
11
11
  gem.authors = ["simplificator"]
12
12
  gem.add_dependency('ya2yaml', '>= 0.2.6')
13
+ gem.files.exclude 'lib/data'
14
+ #gem.files.exclude 'lib/data/*.xml'
15
+ gem.files.include 'lib/data/*.zip'
13
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
17
  end
15
18
  rescue LoadError
@@ -53,4 +56,47 @@ Rake::RDocTask.new do |rdoc|
53
56
  rdoc.rdoc_files.include('README*')
54
57
  rdoc.rdoc_files.include('lib/**/*.rb')
55
58
  end
59
+ require 'rubygems'
60
+ require 'zip/zip'
61
+ require 'lib/babel'
62
+
63
+ namespace :babel do
64
+ task :unpack_data do
65
+ dir = File.join(File.dirname(__FILE__), 'lib', 'data')
66
+ file = File.join(dir, 'udhr_txt.zip')
67
+ Zip::ZipFile.open(file) do |zip|
68
+ zip.each do |entry|
69
+ destination = File.join(dir, entry.name)
70
+ FileUtils.mkdir_p(File.dirname(destination))
71
+ FileUtils.rm(destination) if File.exists?(destination)
72
+ zip.extract(entry, destination)
73
+ end
74
+ end
75
+ FileUtils.cp(File.join(dir, 'udhr_deu_1996.txt'), File.join(dir, 'udhr_deu.txt'))
76
+ end
77
+
78
+ task :build_profile do
79
+ if ENV['lang']
80
+ lang = ENV['lang']
81
+ file = ENV['file']
82
+ dir = ENV['dir'] || File.dirname(__FILE__)
83
+ skip = ENV['skip']
84
+ limit = ENV['limit']
85
+ unless file
86
+ skip ||= 5 # skip header in data files. english all the time
87
+ file = File.join(File.dirname(__FILE__), 'lib', 'data', "udhr_#{lang}.txt")
88
+ end
89
+ puts "Learning about #{lang} from #{file} and save it to #{dir}"
90
+ File.open(file, 'r') do |f|
91
+ f.each_with_index do |line, index|
92
+ if index > skip
93
+ Babel.learn(lang, line)
94
+ end
95
+ end
96
+ end
97
+ Babel.save_profile(lang, :dir => dir, :limit => limit)
98
+ end
99
+ end
100
+
101
+ end
56
102
 
data/VERSION.yml CHANGED
@@ -1,4 +1,4 @@
1
1
  ---
2
- :patch: 4
2
+ :patch: 0
3
3
  :major: 0
4
- :minor: 0
4
+ :minor: 1
data/babel.gemspec ADDED
@@ -0,0 +1,64 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{babel}
5
+ s.version = "0.1.0"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["simplificator"]
9
+ s.date = %q{2009-07-13}
10
+ s.email = %q{info@simplificator.com}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.markdown"
14
+ ]
15
+ s.files = [
16
+ ".document",
17
+ ".gitignore",
18
+ "LICENSE",
19
+ "README.markdown",
20
+ "Rakefile",
21
+ "VERSION.yml",
22
+ "babel.gemspec",
23
+ "lib/babel.rb",
24
+ "lib/babel/babel.rb",
25
+ "lib/babel/profile.rb",
26
+ "lib/babel/string_extensions.rb",
27
+ "lib/profiles/profile_deu.yml",
28
+ "lib/profiles/profile_eng.yml",
29
+ "lib/profiles/profile_fra.yml",
30
+ "lib/profiles/profile_ita.yml",
31
+ "lib/profiles/profile_spa.yml",
32
+ "lib/profiles/udhr_txt.zip",
33
+ "samples/guessing.rb",
34
+ "test/babel_test.rb",
35
+ "test/profile_test.rb",
36
+ "test/string_extensions_test.rb",
37
+ "test/test_helper.rb"
38
+ ]
39
+ s.has_rdoc = true
40
+ s.homepage = %q{http://github.com/simplificator/babel}
41
+ s.rdoc_options = ["--charset=UTF-8"]
42
+ s.require_paths = ["lib"]
43
+ s.rubygems_version = %q{1.3.2}
44
+ s.summary = %q{Utility to guess the language of a text}
45
+ s.test_files = [
46
+ "test/babel_test.rb",
47
+ "test/profile_test.rb",
48
+ "test/string_extensions_test.rb",
49
+ "test/test_helper.rb"
50
+ ]
51
+
52
+ if s.respond_to? :specification_version then
53
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
54
+ s.specification_version = 3
55
+
56
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
57
+ s.add_runtime_dependency(%q<ya2yaml>, [">= 0.2.6"])
58
+ else
59
+ s.add_dependency(%q<ya2yaml>, [">= 0.2.6"])
60
+ end
61
+ else
62
+ s.add_dependency(%q<ya2yaml>, [">= 0.2.6"])
63
+ end
64
+ end
data/lib/babel/babel.rb CHANGED
@@ -1,54 +1,115 @@
1
+ #
2
+ #
3
+ # Profile Generation:
4
+ # Whenever it's about generating a Profile (Babel.learn, Babel.distances and Babel.guess)
5
+ # you can pass
6
+ # * :min_length (2)
7
+ # * :max_length (5)
8
+ # * :pad (true)
9
+ # They are just forwared to String.n_grams (default values in braces)
10
+ # It's highly recomended that you use the same settings for learning and guessing....
11
+
12
+
13
+
1
14
  module Babel
2
15
  @profiles = {}
3
16
  PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
17
+
18
+ # Learn that a text is in a given language.
19
+ # Calls Profile.learn for the profile with the given language.
4
20
  def self.learn(lang, text, options = {})
5
21
  lang = lang.to_s
6
- profile = @profiles[lang] ||= Profile.new()
22
+ profile = @profiles[lang] ||= Profile.new(lang)
7
23
  profile.learn(text, options)
8
24
  end
9
25
 
10
-
26
+ # Clear all the profiles
11
27
  def self.clear_profiles
12
28
  @profiles = {}
13
29
  end
30
+ # find the profile for a language
31
+ def self.profile(lang)
32
+ @profiles[lang]
33
+ end
14
34
 
15
- def self.guess(source, options = {})
16
- found = nil
17
- Babel.distances(source).each do |entry|
18
- found = entry if found.nil? || entry.last < found.last
35
+ # register a profile
36
+ # pass :merge => true to merge into an existing profile
37
+ def self.register_profile(profile, options = {})
38
+ if options[:merge] && @profiles[profile.language]
39
+ @profiles[profile.language].merge(profile)
40
+ else
41
+ @profiles[profile.language] = profile
19
42
  end
20
- found.first if found
21
43
  end
22
44
 
23
- # An array of arrays of [language, distance] arrays
24
- def self.distances(text)
25
- source = Profile.new.learn(text)
26
- @profiles.map { |lang, target| [lang, source.distance(target)] }
45
+ # Guess the language of a text.
46
+ # As soon as there is at least one profile, this method always
47
+ # returns a value (perhaps the wrong) one...
48
+ # I.e. if only "eng" profile is registered, then this method will always retun "eng"
49
+ # not matter what text pass
50
+ #
51
+ def self.guess(source, options = {})
52
+ distances = Babel.distances(source, options)
53
+ distances.first.first if distances.first
27
54
  end
28
-
29
- private
30
55
 
31
- def self.file_name(dir, lang)
32
- File.join(dir, "profile_#{lang}.yml")
56
+ # An array of arrays of [language, distance] arrays.
57
+ # The language with the shortest distance is the most probable solution.
58
+ # Sorted by distance, ascending (first item is most probable)
59
+ def self.distances(text, options = {})
60
+ source = Profile.new.learn(text, options)
61
+ @profiles.map { |lang, target| [lang, source.distance(target)] }.sort {|o1, o2| o1.last <=> o2.last}
33
62
  end
34
63
 
35
- # Load a specific profile ()
64
+
65
+ # Load all the profiles from a given directory.
66
+ # Loads all .yml files so be careful what directory you specify.
67
+ # options are:
68
+ # * :dir the directory, defaults to Babel::PROFILE_DIR
69
+ # See Babel.load_profile() for other options
36
70
  def self.load_profiles(options = {})
37
71
  dir = options[:directory] || PROFILE_DIR
38
72
  Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
39
- file =~ /profile_(.+)\.yml/
40
- @profiles[$1] = YAML.load_file(file)
73
+ Babel.load_profile(file, options)
41
74
  end
42
75
  end
43
76
 
77
+ # Load a single profile
78
+ # Options are:
79
+ # * :merge see Babel.register_profile for details
80
+ def self.load_profile(file, options = {})
81
+ Babel.register_profile(YAML.load_file(file), options)
82
+ end
83
+
84
+ # Save the profiles to a specifified directory.
85
+ # See Babel.save_profile() for options
44
86
  def self.save_profiles(options = {})
45
- dir = options[:directory] || PROFILE_DIR
46
87
  @profiles.each do |lang, profile|
47
- profile.limit(options[:limit]) if options.has_key?(:limit)
48
- File.open(file_name(dir, lang), 'wb') do |file|
49
- file.write(profile.ya2yaml)
50
- end
88
+ Babel.save_profile(lang, options)
89
+ end
90
+ end
91
+
92
+ # Save a specific profile
93
+ # Options are:
94
+ # * :dir -> the directory wo save the files to. Defaults to Babel::PROFILE_DIR
95
+ # * :limit -> Call limit() on the profile before save. This reduces the size of the profile
96
+ # for the cost of (possibly) less accurate language guessing
97
+ def self.save_profile(lang, options = {})
98
+ dir = options[:dir] || PROFILE_DIR
99
+ profile = Babel.profile(lang)
100
+ profile.limit(options[:limit]) if options[:limit]
101
+ File.open(file_name(dir, lang), 'wb') do |file|
102
+ file.write(profile.ya2yaml)
51
103
  end
104
+ end
105
+
106
+ private
107
+
108
+ # Build the file name for a profile file
109
+ # Naming scheme: profile_<LANG>.yml
110
+ def self.file_name(dir, lang)
111
+ File.join(dir, "profile_#{lang}.yml")
52
112
  end
113
+
53
114
  end
54
115
 
data/lib/babel/profile.rb CHANGED
@@ -1,43 +1,61 @@
1
1
  module Babel
2
2
  class Profile
3
- def initialize()
4
- @profile = {}
3
+ attr_reader :language
4
+ attr_reader :data
5
+ def initialize(language = nil)
6
+ @data = {}
5
7
  @total_occurences = 0
8
+ @language = language
6
9
  end
7
10
 
11
+
12
+ # learn a text
13
+ # following options are used when generating the n-grams:
14
+ # * min_length => 2
15
+ # * max_length => 5
16
+ # * pad => true
8
17
  def learn(text, options = {})
9
18
  options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
10
19
  text = clean(text)
11
20
  text.split(' ').each do |word|
12
- ngrams = word.ngrams(options)
13
- ngrams.each do |ngram|
21
+ word.n_grams(options).each do |ngram|
14
22
  self.occured(ngram)
15
23
  end
16
24
  end
25
+ # after learning rank the new n-grams
17
26
  self.rank
18
27
  self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
19
28
  end
20
29
 
21
30
 
31
+ def merge(other)
32
+ if self.language != other.language
33
+ raise ArgumentError.new("self has a language of #{self.language} but profile to merge has #{other.language}")
34
+ end
35
+ other.data.each do |key, value|
36
+ self.occured(key, value.first)
37
+ end
38
+ end
39
+
22
40
  # TODO: needed?
23
41
  def clean(text)
24
42
  return text
25
- text = text.gsub('?', '')
26
- text = text.gsub('.', '')
27
- text = text.gsub(';', '')
43
+ text = text.gsub(/[0-9]/, '')
28
44
  text = text.gsub(':', '')
45
+ text = text.gsub('/', '')
46
+ text = text.gsub('_', '')
29
47
  text = text.gsub('(', '')
30
48
  text = text.gsub(')', '')
31
- text = text.gsub('/', '')
32
- text = text.gsub(/[0-9]*/, '')
33
- text = text.gsub('+', '')
34
- text
49
+ text = text.gsub(';', '')
50
+ text = text.gsub('?', '')
51
+
52
+ return text
35
53
  end
54
+
36
55
  # limit this profile to n items
37
56
  # profile needs to be ranked first
38
- # do not use this if you plan to extend the profile later on
39
57
  def limit(boundary = 100)
40
- @profile.reject! do |key, value|
58
+ @data.reject! do |key, value|
41
59
  raise 'Please call rank() first' if value.last == 0
42
60
  boundary < value.last
43
61
  end
@@ -46,37 +64,37 @@ module Babel
46
64
  # rank the current profile
47
65
  # ngrams are sorted by occurence and then ranked
48
66
  def rank
49
- @profile.values.sort do |o1, o2|
50
- o2.first <=> o1.first
51
- end.each_with_index do |item, index|
52
- item[1] = index + 1
53
- end
67
+ #@data.values.sort do |o1, o2|
68
+ # o2.first <=> o1.first
69
+ #end.each_with_index do |item, index|
70
+ # item[1] = index + 1
71
+ #end
54
72
 
55
- @profile.values.each do |value|
73
+ @data.values.each do |value|
56
74
  value[1] = value[0] / @total_occurences.to_f
57
75
  end
58
76
  end
59
77
 
60
- # Called when a ngram is occured, optional you can pass an
78
+ # Called when a n-gram is occured, optional you can pass an
61
79
  # amount (how many times the ngram occured)
62
80
  def occured(ngram, amount = 1)
63
- (@profile[ngram] ||= [0, 0])[0] += amount
81
+ (@data[ngram] ||= [0, 0])[0] += amount
64
82
  @total_occurences += amount
65
83
  end
66
84
 
67
85
  # find the occurence of a ngram. if it never occured, returns 0
68
86
  def occurence(ngram)
69
- @profile[ngram] ? @profile[ngram].first : 0
87
+ @data[ngram] ? @data[ngram].first : 0
70
88
  end
71
89
 
72
90
  # find the ranking of a ngram. if it is not yet ranked, return 0
73
91
  def ranking(ngram)
74
- @profile[ngram] ? @profile[ngram].last : 0
92
+ @data[ngram] ? @data[ngram].last : 0
75
93
  end
76
94
 
77
95
  # Calculate the distance to another profile
78
96
  def distance(other)
79
- @profile.inject(0) do |memo, item|
97
+ @data.inject(0) do |memo, item|
80
98
  other_ranking = other.ranking(item.first)
81
99
  if other_ranking == 0
82
100
  memo += 1
@@ -88,7 +106,7 @@ module Babel
88
106
 
89
107
 
90
108
  def to_s
91
- @profile.inspect
109
+ @data.inspect
92
110
  end
93
111
  end
94
112
  end
@@ -1,15 +1,19 @@
1
1
  class String
2
-
3
- # TODO: recursive?
4
- def ngrams(options = {})
2
+ # Generate n-grams for a string.
3
+ # options are:
4
+ # :min_length : minimum length of the n-grams (defaults to 1)
5
+ # :max_length : maximum length of the n-grams (defaults to self.length)
6
+ # :pad : pad wiht '_' to generate all possible n-grams (defaults to false)
7
+ def n_grams(options = {})
8
+ # TODO: recursive?
9
+ # TODO: use min/max length for loop index instead of looping
10
+ # all and then use if test to decide if to add or not
5
11
  min_length = options[:min_length] || 1
6
12
  max_length = options[:max_length] || self.length
7
13
  pad = options[:pad] || false
8
14
  value = options[:preserve_case] ? self : self.downcase
9
15
  value = "_#{value}#{'_' * (value.length - 1)}" if pad
10
16
  res = []
11
- # TODO: use min/max length for loop index instead of looping
12
- # all and then use if test to decide if to add or not
13
17
  0.upto(value.length - 1) do |index|
14
18
  index.upto(value.length - 1) do |len|
15
19
  if value[index..len].length >= min_length && value[index..len].length <= max_length
@@ -20,22 +24,23 @@ class String
20
24
  res
21
25
  end
22
26
 
23
- # def byte_grams(options = {})
24
- # min_length = options[:min_length] || 1
25
- # max_length = options[:max_length] || self.length
26
- # value = options[:preserve_case] ? self : self.downcase
27
- # res = []
28
- #
29
- # end
30
-
31
-
32
- # Ask Babel about the language of this text
33
- # Can return nil if no language found
27
+ # Ask Babel about the language of this text.
28
+ # Convenience method, just calls Babel.guess().
29
+ # See Babel.guess for description.
34
30
  def language(options = {})
35
31
  Babel.guess(self, options)
36
32
  end
33
+ # Ask Bable about the languages this text could be.
34
+ # It will return all the registered languages with the most probable
35
+ # Language first. You might want to restrict this before presenting to
36
+ # the user.
37
+ def languages(options = {})
38
+ Babel.distances(self, options).map() {|item| item.first}
39
+ end
37
40
 
38
- # Tell Babel that this text is in a given language
41
+ # Tell Babel that this text is in a given language.
42
+ # Convenience method, just calls Babel.learn().
43
+ # See Babel.learn for description
39
44
  def language=(lang, options = {})
40
45
  Babel.learn(lang, self, options)
41
46
  end
@@ -1,5 +1,5 @@
1
1
  --- !ruby/object:Babel::Profile
2
- profile:
2
+ data:
3
3
  (i:
4
4
  - 1
5
5
  - 1.48557506610809e-05
@@ -25359,4 +25359,5 @@
25359
25359
  ützu:
25360
25360
  - 1
25361
25361
  - 1.48557506610809e-05
25362
+ language: deu
25362
25363
  total_occurences: 67314
@@ -1,5 +1,5 @@
1
1
  --- !ruby/object:Babel::Profile
2
- profile:
2
+ data:
3
3
  ? ",_"
4
4
  :
5
5
  - 94
@@ -20791,4 +20791,5 @@
20791
20791
  ‐se:
20792
20792
  - 1
20793
20793
  - 1.85742412422453e-05
20794
+ language: eng
20794
20795
  total_occurences: 53838
@@ -1,5 +1,5 @@
1
1
  --- !ruby/object:Babel::Profile
2
- profile:
2
+ data:
3
3
  ? ",_"
4
4
  :
5
5
  - 118
@@ -24961,4 +24961,5 @@
24961
24961
  ’é:
24962
24962
  - 8
24963
24963
  - 0.000123525415354209
24964
+ language: fra
24964
24965
  total_occurences: 64764