simplificator-babel 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (375) hide show
  1. data/LICENSE +20 -0
  2. data/README.markdown +19 -0
  3. data/Rakefile +56 -0
  4. data/VERSION.yml +4 -0
  5. data/lib/babel/babel.rb +54 -0
  6. data/lib/babel/profile.rb +94 -0
  7. data/lib/babel/string_extensions.rb +42 -0
  8. data/lib/babel.rb +10 -0
  9. data/lib/data/udhr_txt/index.xml +385 -0
  10. data/lib/data/udhr_txt/udhr_007.txt +220 -0
  11. data/lib/data/udhr_txt/udhr_008.txt +220 -0
  12. data/lib/data/udhr_txt/udhr_009.txt +228 -0
  13. data/lib/data/udhr_txt/udhr_010.txt +219 -0
  14. data/lib/data/udhr_txt/udhr_011.txt +232 -0
  15. data/lib/data/udhr_txt/udhr_abk.txt +218 -0
  16. data/lib/data/udhr_txt/udhr_ace.txt +221 -0
  17. data/lib/data/udhr_txt/udhr_acu.txt +222 -0
  18. data/lib/data/udhr_txt/udhr_ada.txt +220 -0
  19. data/lib/data/udhr_txt/udhr_afr.txt +219 -0
  20. data/lib/data/udhr_txt/udhr_agr.txt +219 -0
  21. data/lib/data/udhr_txt/udhr_aii.txt +216 -0
  22. data/lib/data/udhr_txt/udhr_ajg.txt +219 -0
  23. data/lib/data/udhr_txt/udhr_aka_akuapem.txt +221 -0
  24. data/lib/data/udhr_txt/udhr_aka_asante.txt +220 -0
  25. data/lib/data/udhr_txt/udhr_aka_fante.txt +219 -0
  26. data/lib/data/udhr_txt/udhr_als.txt +220 -0
  27. data/lib/data/udhr_txt/udhr_amc.txt +215 -0
  28. data/lib/data/udhr_txt/udhr_ame.txt +222 -0
  29. data/lib/data/udhr_txt/udhr_amh.txt +209 -0
  30. data/lib/data/udhr_txt/udhr_amr.txt +221 -0
  31. data/lib/data/udhr_txt/udhr_arb.txt +220 -0
  32. data/lib/data/udhr_txt/udhr_arl.txt +222 -0
  33. data/lib/data/udhr_txt/udhr_arn.txt +218 -0
  34. data/lib/data/udhr_txt/udhr_ast.txt +221 -0
  35. data/lib/data/udhr_txt/udhr_auv.txt +217 -0
  36. data/lib/data/udhr_txt/udhr_ayr.txt +218 -0
  37. data/lib/data/udhr_txt/udhr_azj_cyrl.txt +218 -0
  38. data/lib/data/udhr_txt/udhr_azj_latn.txt +218 -0
  39. data/lib/data/udhr_txt/udhr_bam.txt +218 -0
  40. data/lib/data/udhr_txt/udhr_ban.txt +222 -0
  41. data/lib/data/udhr_txt/udhr_bba.txt +218 -0
  42. data/lib/data/udhr_txt/udhr_bci.txt +217 -0
  43. data/lib/data/udhr_txt/udhr_bcl.txt +219 -0
  44. data/lib/data/udhr_txt/udhr_bel.txt +221 -0
  45. data/lib/data/udhr_txt/udhr_bem.txt +217 -0
  46. data/lib/data/udhr_txt/udhr_ben.txt +222 -0
  47. data/lib/data/udhr_txt/udhr_bho.txt +219 -0
  48. data/lib/data/udhr_txt/udhr_bin.txt +232 -0
  49. data/lib/data/udhr_txt/udhr_bis.txt +218 -0
  50. data/lib/data/udhr_txt/udhr_blu.txt +219 -0
  51. data/lib/data/udhr_txt/udhr_boa.txt +223 -0
  52. data/lib/data/udhr_txt/udhr_bod.txt +221 -0
  53. data/lib/data/udhr_txt/udhr_bos_cyrl.txt +220 -0
  54. data/lib/data/udhr_txt/udhr_bos_latn.txt +220 -0
  55. data/lib/data/udhr_txt/udhr_bre.txt +222 -0
  56. data/lib/data/udhr_txt/udhr_btb.txt +217 -0
  57. data/lib/data/udhr_txt/udhr_bug.txt +222 -0
  58. data/lib/data/udhr_txt/udhr_bul.txt +218 -0
  59. data/lib/data/udhr_txt/udhr_cab.txt +221 -0
  60. data/lib/data/udhr_txt/udhr_cak.txt +217 -0
  61. data/lib/data/udhr_txt/udhr_cat.txt +220 -0
  62. data/lib/data/udhr_txt/udhr_cbr.txt +219 -0
  63. data/lib/data/udhr_txt/udhr_cbs.txt +153 -0
  64. data/lib/data/udhr_txt/udhr_cbt.txt +220 -0
  65. data/lib/data/udhr_txt/udhr_cbu.txt +218 -0
  66. data/lib/data/udhr_txt/udhr_ccx.txt +222 -0
  67. data/lib/data/udhr_txt/udhr_ceb.txt +218 -0
  68. data/lib/data/udhr_txt/udhr_ces.txt +221 -0
  69. data/lib/data/udhr_txt/udhr_cha.txt +219 -0
  70. data/lib/data/udhr_txt/udhr_chj.txt +220 -0
  71. data/lib/data/udhr_txt/udhr_chk.txt +220 -0
  72. data/lib/data/udhr_txt/udhr_chr.txt +10 -0
  73. data/lib/data/udhr_txt/udhr_cic.txt +220 -0
  74. data/lib/data/udhr_txt/udhr_cjk.txt +218 -0
  75. data/lib/data/udhr_txt/udhr_cjk_AO.txt +220 -0
  76. data/lib/data/udhr_txt/udhr_ckb.txt +217 -0
  77. data/lib/data/udhr_txt/udhr_cmn_hans.txt +220 -0
  78. data/lib/data/udhr_txt/udhr_cmn_hant.txt +220 -0
  79. data/lib/data/udhr_txt/udhr_cnh.txt +220 -0
  80. data/lib/data/udhr_txt/udhr_cni.txt +220 -0
  81. data/lib/data/udhr_txt/udhr_cos.txt +218 -0
  82. data/lib/data/udhr_txt/udhr_cot.txt +222 -0
  83. data/lib/data/udhr_txt/udhr_cpu.txt +219 -0
  84. data/lib/data/udhr_txt/udhr_crs.txt +217 -0
  85. data/lib/data/udhr_txt/udhr_csa.txt +223 -0
  86. data/lib/data/udhr_txt/udhr_csw.txt +163 -0
  87. data/lib/data/udhr_txt/udhr_ctd.txt +222 -0
  88. data/lib/data/udhr_txt/udhr_cym.txt +222 -0
  89. data/lib/data/udhr_txt/udhr_dag.txt +217 -0
  90. data/lib/data/udhr_txt/udhr_dan.txt +224 -0
  91. data/lib/data/udhr_txt/udhr_ddn.txt +217 -0
  92. data/lib/data/udhr_txt/udhr_deu_1901.txt +220 -0
  93. data/lib/data/udhr_txt/udhr_deu_1996.txt +220 -0
  94. data/lib/data/udhr_txt/udhr_dga.txt +220 -0
  95. data/lib/data/udhr_txt/udhr_dip.txt +217 -0
  96. data/lib/data/udhr_txt/udhr_div.txt +220 -0
  97. data/lib/data/udhr_txt/udhr_dyo.txt +217 -0
  98. data/lib/data/udhr_txt/udhr_dzo.txt +9 -0
  99. data/lib/data/udhr_txt/udhr_ell_monotonic.txt +220 -0
  100. data/lib/data/udhr_txt/udhr_ell_polytonic.txt +220 -0
  101. data/lib/data/udhr_txt/udhr_emk.txt +218 -0
  102. data/lib/data/udhr_txt/udhr_eml.txt +219 -0
  103. data/lib/data/udhr_txt/udhr_eng.txt +219 -0
  104. data/lib/data/udhr_txt/udhr_epo.txt +221 -0
  105. data/lib/data/udhr_txt/udhr_est.txt +219 -0
  106. data/lib/data/udhr_txt/udhr_eus.txt +220 -0
  107. data/lib/data/udhr_txt/udhr_eve.txt +207 -0
  108. data/lib/data/udhr_txt/udhr_ewe.txt +218 -0
  109. data/lib/data/udhr_txt/udhr_fao.txt +219 -0
  110. data/lib/data/udhr_txt/udhr_fij.txt +224 -0
  111. data/lib/data/udhr_txt/udhr_fin.txt +224 -0
  112. data/lib/data/udhr_txt/udhr_flm.txt +219 -0
  113. data/lib/data/udhr_txt/udhr_fon.txt +217 -0
  114. data/lib/data/udhr_txt/udhr_fra.txt +218 -0
  115. data/lib/data/udhr_txt/udhr_fri.txt +219 -0
  116. data/lib/data/udhr_txt/udhr_fuc.txt +217 -0
  117. data/lib/data/udhr_txt/udhr_fur.txt +220 -0
  118. data/lib/data/udhr_txt/udhr_gaa.txt +220 -0
  119. data/lib/data/udhr_txt/udhr_gag.txt +223 -0
  120. data/lib/data/udhr_txt/udhr_gax.txt +222 -0
  121. data/lib/data/udhr_txt/udhr_gjn.txt +220 -0
  122. data/lib/data/udhr_txt/udhr_gkp.txt +216 -0
  123. data/lib/data/udhr_txt/udhr_gla.txt +229 -0
  124. data/lib/data/udhr_txt/udhr_gle.txt +215 -0
  125. data/lib/data/udhr_txt/udhr_glg.txt +217 -0
  126. data/lib/data/udhr_txt/udhr_guc.txt +221 -0
  127. data/lib/data/udhr_txt/udhr_gug.txt +210 -0
  128. data/lib/data/udhr_txt/udhr_guj.txt +219 -0
  129. data/lib/data/udhr_txt/udhr_gyr.txt +203 -0
  130. data/lib/data/udhr_txt/udhr_hat_kreyol.txt +221 -0
  131. data/lib/data/udhr_txt/udhr_hat_popular.txt +221 -0
  132. data/lib/data/udhr_txt/udhr_hau_NE.txt +219 -0
  133. data/lib/data/udhr_txt/udhr_hau_NG.txt +219 -0
  134. data/lib/data/udhr_txt/udhr_haw.txt +219 -0
  135. data/lib/data/udhr_txt/udhr_hea.txt +219 -0
  136. data/lib/data/udhr_txt/udhr_heb.txt +216 -0
  137. data/lib/data/udhr_txt/udhr_hil.txt +217 -0
  138. data/lib/data/udhr_txt/udhr_hin.txt +222 -0
  139. data/lib/data/udhr_txt/udhr_hms.txt +219 -0
  140. data/lib/data/udhr_txt/udhr_hna.txt +217 -0
  141. data/lib/data/udhr_txt/udhr_hni.txt +218 -0
  142. data/lib/data/udhr_txt/udhr_hrv.txt +218 -0
  143. data/lib/data/udhr_txt/udhr_hsb.txt +220 -0
  144. data/lib/data/udhr_txt/udhr_hun.txt +218 -0
  145. data/lib/data/udhr_txt/udhr_hus.txt +222 -0
  146. data/lib/data/udhr_txt/udhr_huu.txt +220 -0
  147. data/lib/data/udhr_txt/udhr_hva.txt +220 -0
  148. data/lib/data/udhr_txt/udhr_hye.txt +234 -0
  149. data/lib/data/udhr_txt/udhr_ibb.txt +235 -0
  150. data/lib/data/udhr_txt/udhr_ibo.txt +219 -0
  151. data/lib/data/udhr_txt/udhr_ido.txt +224 -0
  152. data/lib/data/udhr_txt/udhr_iii.txt +9 -0
  153. data/lib/data/udhr_txt/udhr_ike.txt +163 -0
  154. data/lib/data/udhr_txt/udhr_ilo.txt +217 -0
  155. data/lib/data/udhr_txt/udhr_ina.txt +220 -0
  156. data/lib/data/udhr_txt/udhr_ind.txt +219 -0
  157. data/lib/data/udhr_txt/udhr_isl.txt +217 -0
  158. data/lib/data/udhr_txt/udhr_ita.txt +221 -0
  159. data/lib/data/udhr_txt/udhr_jav.txt +222 -0
  160. data/lib/data/udhr_txt/udhr_jpn.txt +219 -0
  161. data/lib/data/udhr_txt/udhr_kal.txt +218 -0
  162. data/lib/data/udhr_txt/udhr_kan.txt +216 -0
  163. data/lib/data/udhr_txt/udhr_kat.txt +221 -0
  164. data/lib/data/udhr_txt/udhr_kaz.txt +218 -0
  165. data/lib/data/udhr_txt/udhr_kbp.txt +218 -0
  166. data/lib/data/udhr_txt/udhr_kde.txt +212 -0
  167. data/lib/data/udhr_txt/udhr_kea.txt +219 -0
  168. data/lib/data/udhr_txt/udhr_kek.txt +219 -0
  169. data/lib/data/udhr_txt/udhr_khk.txt +217 -0
  170. data/lib/data/udhr_txt/udhr_khk_mong.txt +11 -0
  171. data/lib/data/udhr_txt/udhr_khm.txt +220 -0
  172. data/lib/data/udhr_txt/udhr_kin.txt +220 -0
  173. data/lib/data/udhr_txt/udhr_kir.txt +220 -0
  174. data/lib/data/udhr_txt/udhr_kmb.txt +219 -0
  175. data/lib/data/udhr_txt/udhr_knc.txt +230 -0
  176. data/lib/data/udhr_txt/udhr_kng.txt +219 -0
  177. data/lib/data/udhr_txt/udhr_kng_AO.txt +219 -0
  178. data/lib/data/udhr_txt/udhr_koo.txt +216 -0
  179. data/lib/data/udhr_txt/udhr_kor.txt +219 -0
  180. data/lib/data/udhr_txt/udhr_kqn.txt +218 -0
  181. data/lib/data/udhr_txt/udhr_kri.txt +226 -0
  182. data/lib/data/udhr_txt/udhr_ktu.txt +219 -0
  183. data/lib/data/udhr_txt/udhr_lao.txt +223 -0
  184. data/lib/data/udhr_txt/udhr_lat.txt +221 -0
  185. data/lib/data/udhr_txt/udhr_lat_1.txt +220 -0
  186. data/lib/data/udhr_txt/udhr_lav.txt +220 -0
  187. data/lib/data/udhr_txt/udhr_lia.txt +218 -0
  188. data/lib/data/udhr_txt/udhr_lin.txt +217 -0
  189. data/lib/data/udhr_txt/udhr_lin_tones.txt +214 -0
  190. data/lib/data/udhr_txt/udhr_lit.txt +218 -0
  191. data/lib/data/udhr_txt/udhr_lnc.txt +219 -0
  192. data/lib/data/udhr_txt/udhr_lns.txt +219 -0
  193. data/lib/data/udhr_txt/udhr_loz.txt +219 -0
  194. data/lib/data/udhr_txt/udhr_ltz.txt +218 -0
  195. data/lib/data/udhr_txt/udhr_lua.txt +219 -0
  196. data/lib/data/udhr_txt/udhr_lue.txt +217 -0
  197. data/lib/data/udhr_txt/udhr_lug.txt +216 -0
  198. data/lib/data/udhr_txt/udhr_lun.txt +216 -0
  199. data/lib/data/udhr_txt/udhr_mad.txt +223 -0
  200. data/lib/data/udhr_txt/udhr_mag.txt +220 -0
  201. data/lib/data/udhr_txt/udhr_mah.txt +220 -0
  202. data/lib/data/udhr_txt/udhr_mai.txt +223 -0
  203. data/lib/data/udhr_txt/udhr_mal.txt +210 -0
  204. data/lib/data/udhr_txt/udhr_mam.txt +218 -0
  205. data/lib/data/udhr_txt/udhr_mar.txt +219 -0
  206. data/lib/data/udhr_txt/udhr_maz.txt +218 -0
  207. data/lib/data/udhr_txt/udhr_mcd.txt +220 -0
  208. data/lib/data/udhr_txt/udhr_mcf.txt +223 -0
  209. data/lib/data/udhr_txt/udhr_men.txt +222 -0
  210. data/lib/data/udhr_txt/udhr_mic.txt +218 -0
  211. data/lib/data/udhr_txt/udhr_min.txt +221 -0
  212. data/lib/data/udhr_txt/udhr_miq.txt +213 -0
  213. data/lib/data/udhr_txt/udhr_mkd.txt +221 -0
  214. data/lib/data/udhr_txt/udhr_mlt.txt +217 -0
  215. data/lib/data/udhr_txt/udhr_mly_arab.txt +219 -0
  216. data/lib/data/udhr_txt/udhr_mly_latn.txt +218 -0
  217. data/lib/data/udhr_txt/udhr_mos.txt +216 -0
  218. data/lib/data/udhr_txt/udhr_mri.txt +219 -0
  219. data/lib/data/udhr_txt/udhr_mxi.txt +218 -0
  220. data/lib/data/udhr_txt/udhr_mxv.txt +223 -0
  221. data/lib/data/udhr_txt/udhr_mya.txt +219 -0
  222. data/lib/data/udhr_txt/udhr_mzi.txt +227 -0
  223. data/lib/data/udhr_txt/udhr_nav.txt +219 -0
  224. data/lib/data/udhr_txt/udhr_nba.txt +257 -0
  225. data/lib/data/udhr_txt/udhr_nbl.txt +218 -0
  226. data/lib/data/udhr_txt/udhr_ndo.txt +217 -0
  227. data/lib/data/udhr_txt/udhr_nep.txt +214 -0
  228. data/lib/data/udhr_txt/udhr_nhn.txt +221 -0
  229. data/lib/data/udhr_txt/udhr_nld.txt +217 -0
  230. data/lib/data/udhr_txt/udhr_nno.txt +219 -0
  231. data/lib/data/udhr_txt/udhr_nob.txt +225 -0
  232. data/lib/data/udhr_txt/udhr_not.txt +218 -0
  233. data/lib/data/udhr_txt/udhr_nso.txt +219 -0
  234. data/lib/data/udhr_txt/udhr_nya_chechewa.txt +221 -0
  235. data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +218 -0
  236. data/lib/data/udhr_txt/udhr_nym.txt +229 -0
  237. data/lib/data/udhr_txt/udhr_nyn.txt +213 -0
  238. data/lib/data/udhr_txt/udhr_nzi.txt +221 -0
  239. data/lib/data/udhr_txt/udhr_ojb.txt +221 -0
  240. data/lib/data/udhr_txt/udhr_oss.txt +214 -0
  241. data/lib/data/udhr_txt/udhr_ote.txt +218 -0
  242. data/lib/data/udhr_txt/udhr_pam.txt +225 -0
  243. data/lib/data/udhr_txt/udhr_pan.txt +227 -0
  244. data/lib/data/udhr_txt/udhr_pau.txt +219 -0
  245. data/lib/data/udhr_txt/udhr_pbb.txt +218 -0
  246. data/lib/data/udhr_txt/udhr_pbu.txt +9 -0
  247. data/lib/data/udhr_txt/udhr_pcd.txt +218 -0
  248. data/lib/data/udhr_txt/udhr_pcm.txt +218 -0
  249. data/lib/data/udhr_txt/udhr_pes_1.txt +218 -0
  250. data/lib/data/udhr_txt/udhr_pes_2.txt +222 -0
  251. data/lib/data/udhr_txt/udhr_pis.txt +219 -0
  252. data/lib/data/udhr_txt/udhr_plt.txt +214 -0
  253. data/lib/data/udhr_txt/udhr_pnb.txt +223 -0
  254. data/lib/data/udhr_txt/udhr_pol.txt +220 -0
  255. data/lib/data/udhr_txt/udhr_pon.txt +218 -0
  256. data/lib/data/udhr_txt/udhr_por_BR.txt +231 -0
  257. data/lib/data/udhr_txt/udhr_por_PT.txt +219 -0
  258. data/lib/data/udhr_txt/udhr_pov.txt +220 -0
  259. data/lib/data/udhr_txt/udhr_ppl.txt +219 -0
  260. data/lib/data/udhr_txt/udhr_prq.txt +151 -0
  261. data/lib/data/udhr_txt/udhr_prv.txt +207 -0
  262. data/lib/data/udhr_txt/udhr_quc.txt +217 -0
  263. data/lib/data/udhr_txt/udhr_qud.txt +218 -0
  264. data/lib/data/udhr_txt/udhr_quy.txt +221 -0
  265. data/lib/data/udhr_txt/udhr_quz.txt +223 -0
  266. data/lib/data/udhr_txt/udhr_qva.txt +219 -0
  267. data/lib/data/udhr_txt/udhr_qvc.txt +218 -0
  268. data/lib/data/udhr_txt/udhr_qvh.txt +217 -0
  269. data/lib/data/udhr_txt/udhr_qvm.txt +219 -0
  270. data/lib/data/udhr_txt/udhr_qvn.txt +217 -0
  271. data/lib/data/udhr_txt/udhr_qwh.txt +218 -0
  272. data/lib/data/udhr_txt/udhr_qxa.txt +217 -0
  273. data/lib/data/udhr_txt/udhr_qxn.txt +216 -0
  274. data/lib/data/udhr_txt/udhr_qxu.txt +221 -0
  275. data/lib/data/udhr_txt/udhr_rar.txt +220 -0
  276. data/lib/data/udhr_txt/udhr_rmn.txt +220 -0
  277. data/lib/data/udhr_txt/udhr_rmn_1.txt +221 -0
  278. data/lib/data/udhr_txt/udhr_rmy.txt +218 -0
  279. data/lib/data/udhr_txt/udhr_roh.txt +217 -0
  280. data/lib/data/udhr_txt/udhr_ron_1953.txt +218 -0
  281. data/lib/data/udhr_txt/udhr_ron_1993.txt +218 -0
  282. data/lib/data/udhr_txt/udhr_ron_2006.txt +218 -0
  283. data/lib/data/udhr_txt/udhr_run.txt +218 -0
  284. data/lib/data/udhr_txt/udhr_rus.txt +220 -0
  285. data/lib/data/udhr_txt/udhr_sag.txt +220 -0
  286. data/lib/data/udhr_txt/udhr_san.txt +219 -0
  287. data/lib/data/udhr_txt/udhr_sco.txt +222 -0
  288. data/lib/data/udhr_txt/udhr_shp.txt +224 -0
  289. data/lib/data/udhr_txt/udhr_skr.txt +225 -0
  290. data/lib/data/udhr_txt/udhr_slk.txt +219 -0
  291. data/lib/data/udhr_txt/udhr_slv.txt +218 -0
  292. data/lib/data/udhr_txt/udhr_sme.txt +220 -0
  293. data/lib/data/udhr_txt/udhr_smo.txt +226 -0
  294. data/lib/data/udhr_txt/udhr_sna.txt +223 -0
  295. data/lib/data/udhr_txt/udhr_snk.txt +220 -0
  296. data/lib/data/udhr_txt/udhr_som.txt +216 -0
  297. data/lib/data/udhr_txt/udhr_sot.txt +220 -0
  298. data/lib/data/udhr_txt/udhr_spa.txt +220 -0
  299. data/lib/data/udhr_txt/udhr_src.txt +220 -0
  300. data/lib/data/udhr_txt/udhr_srp_cyrl.txt +218 -0
  301. data/lib/data/udhr_txt/udhr_srp_latn.txt +218 -0
  302. data/lib/data/udhr_txt/udhr_srr.txt +219 -0
  303. data/lib/data/udhr_txt/udhr_ssw.txt +228 -0
  304. data/lib/data/udhr_txt/udhr_suk.txt +218 -0
  305. data/lib/data/udhr_txt/udhr_sun.txt +227 -0
  306. data/lib/data/udhr_txt/udhr_sus.txt +218 -0
  307. data/lib/data/udhr_txt/udhr_swe.txt +224 -0
  308. data/lib/data/udhr_txt/udhr_swh.txt +221 -0
  309. data/lib/data/udhr_txt/udhr_tah.txt +217 -0
  310. data/lib/data/udhr_txt/udhr_taj.txt +10 -0
  311. data/lib/data/udhr_txt/udhr_tam.txt +227 -0
  312. data/lib/data/udhr_txt/udhr_tat.txt +219 -0
  313. data/lib/data/udhr_txt/udhr_tbz.txt +219 -0
  314. data/lib/data/udhr_txt/udhr_tca.txt +219 -0
  315. data/lib/data/udhr_txt/udhr_tem.txt +216 -0
  316. data/lib/data/udhr_txt/udhr_tet.txt +219 -0
  317. data/lib/data/udhr_txt/udhr_tgk.txt +217 -0
  318. data/lib/data/udhr_txt/udhr_tgl.txt +224 -0
  319. data/lib/data/udhr_txt/udhr_tgl_tglg.txt +9 -0
  320. data/lib/data/udhr_txt/udhr_tha.txt +217 -0
  321. data/lib/data/udhr_txt/udhr_tir.txt +217 -0
  322. data/lib/data/udhr_txt/udhr_tiv.txt +232 -0
  323. data/lib/data/udhr_txt/udhr_tob.txt +218 -0
  324. data/lib/data/udhr_txt/udhr_toi.txt +216 -0
  325. data/lib/data/udhr_txt/udhr_toj.txt +219 -0
  326. data/lib/data/udhr_txt/udhr_ton.txt +221 -0
  327. data/lib/data/udhr_txt/udhr_top.txt +220 -0
  328. data/lib/data/udhr_txt/udhr_tpi.txt +219 -0
  329. data/lib/data/udhr_txt/udhr_tsn.txt +219 -0
  330. data/lib/data/udhr_txt/udhr_tso_MZ.txt +220 -0
  331. data/lib/data/udhr_txt/udhr_tsz.txt +218 -0
  332. data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +216 -0
  333. data/lib/data/udhr_txt/udhr_tuk_latn.txt +221 -0
  334. data/lib/data/udhr_txt/udhr_tur.txt +219 -0
  335. data/lib/data/udhr_txt/udhr_tzc.txt +219 -0
  336. data/lib/data/udhr_txt/udhr_tzh.txt +218 -0
  337. data/lib/data/udhr_txt/udhr_tzm.txt +220 -0
  338. data/lib/data/udhr_txt/udhr_tzm_tfng.txt +9 -0
  339. data/lib/data/udhr_txt/udhr_uig_arab.txt +219 -0
  340. data/lib/data/udhr_txt/udhr_uig_latn.txt +219 -0
  341. data/lib/data/udhr_txt/udhr_ukr.txt +218 -0
  342. data/lib/data/udhr_txt/udhr_umb.txt +218 -0
  343. data/lib/data/udhr_txt/udhr_ura.txt +219 -0
  344. data/lib/data/udhr_txt/udhr_urd.txt +9 -0
  345. data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +220 -0
  346. data/lib/data/udhr_txt/udhr_uzn_latn.txt +220 -0
  347. data/lib/data/udhr_txt/udhr_vai.txt +224 -0
  348. data/lib/data/udhr_txt/udhr_vie.txt +221 -0
  349. data/lib/data/udhr_txt/udhr_vmw.txt +220 -0
  350. data/lib/data/udhr_txt/udhr_war.txt +219 -0
  351. data/lib/data/udhr_txt/udhr_wln.txt +220 -0
  352. data/lib/data/udhr_txt/udhr_wol.txt +219 -0
  353. data/lib/data/udhr_txt/udhr_wwa.txt +109 -0
  354. data/lib/data/udhr_txt/udhr_xho.txt +219 -0
  355. data/lib/data/udhr_txt/udhr_xsm.txt +219 -0
  356. data/lib/data/udhr_txt/udhr_yad.txt +220 -0
  357. data/lib/data/udhr_txt/udhr_yao.txt +214 -0
  358. data/lib/data/udhr_txt/udhr_yap.txt +220 -0
  359. data/lib/data/udhr_txt/udhr_ydd.txt +223 -0
  360. data/lib/data/udhr_txt/udhr_ykg.txt +211 -0
  361. data/lib/data/udhr_txt/udhr_yor.txt +218 -0
  362. data/lib/data/udhr_txt/udhr_yua.txt +218 -0
  363. data/lib/data/udhr_txt/udhr_zam.txt +223 -0
  364. data/lib/data/udhr_txt/udhr_ztu.txt +219 -0
  365. data/lib/data/udhr_txt/udhr_zul.txt +219 -0
  366. data/lib/profiles/profile_deu_1996.yml +25362 -0
  367. data/lib/profiles/profile_eng.yml +20794 -0
  368. data/lib/profiles/profile_fra.yml +24964 -0
  369. data/lib/profiles/profile_spa.yml +23020 -0
  370. data/test/babel_test.rb +44 -0
  371. data/test/profile_test.rb +105 -0
  372. data/test/string_extensions_test.rb +43 -0
  373. data/test/test_helper.rb +10 -0
  374. data/test/train.rb +26 -0
  375. metadata +440 -0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Simplificator GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,19 @@
1
+ #babel
2
+
3
+ Babel is a gem to identify in what language a text is written.
4
+ It is based on the n-gram approach by Cacnar and Trenkle as described in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
5
+
6
+
7
+ ##usage
8
+ require 'rubygems'
9
+ require 'simplificator-babel'
10
+
11
+ # Train babel: feed it some texts
12
+ 'An english text to train and learn'.language= 'en'
13
+ 'Ein deutscher Text'.language= 'de'
14
+
15
+ puts
16
+
17
+ ##Copyright
18
+
19
+ Copyright (c) 2009 Simplificator GmbH. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "babel"
8
+ gem.summary = %Q{Utility to guess the language of a text}
9
+ gem.email = "info@simplificator.com"
10
+ gem.homepage = "http://github.com/simplificator/babel"
11
+ gem.authors = ["simplificator"]
12
+ gem.add_dependency('ya2yaml', '>= 0.2.6')
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+ end
15
+ rescue LoadError
16
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "babel #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 4
3
+ :major: 0
4
+ :minor: 0
@@ -0,0 +1,54 @@
1
+ module Babel
2
+ @profiles = {}
3
+ PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
4
+ def self.learn(lang, text, options = {})
5
+ lang = lang.to_s
6
+ profile = @profiles[lang] ||= Profile.new()
7
+ profile.learn(text, options)
8
+ end
9
+
10
+
11
+ def self.clear_profiles
12
+ @profiles = {}
13
+ end
14
+
15
+ def self.guess(source, options = {})
16
+ found = nil
17
+ Babel.distances(source).each do |entry|
18
+ found = entry if found.nil? || entry.last < found.last
19
+ end
20
+ found.first if found
21
+ end
22
+
23
+ # An array of arrays of [language, distance] arrays
24
+ def self.distances(text)
25
+ source = Profile.new.learn(text)
26
+ @profiles.map { |lang, target| [lang, source.distance(target)] }
27
+ end
28
+
29
+ private
30
+
31
+ def self.file_name(dir, lang)
32
+ File.join(dir, "profile_#{lang}.yml")
33
+ end
34
+
35
+ # Load a specific profile ()
36
+ def self.load_profiles(options = {})
37
+ dir = options[:directory] || PROFILE_DIR
38
+ Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
39
+ file =~ /profile_(.+)\.yml/
40
+ @profiles[$1] = YAML.load_file(file)
41
+ end
42
+ end
43
+
44
+ def self.save_profiles(options = {})
45
+ dir = options[:directory] || PROFILE_DIR
46
+ @profiles.each do |lang, profile|
47
+ profile.limit(options[:limit]) if options.has_key?(:limit)
48
+ File.open(file_name(dir, lang), 'wb') do |file|
49
+ file.write(profile.ya2yaml)
50
+ end
51
+ end
52
+ end
53
+ end
54
+
@@ -0,0 +1,94 @@
1
+ module Babel
2
+ class Profile
3
+ def initialize()
4
+ @profile = {}
5
+ @total_occurences = 0
6
+ end
7
+
8
+ def learn(text, options = {})
9
+ options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
10
+ text = clean(text)
11
+ text.split(' ').each do |word|
12
+ ngrams = word.ngrams(options)
13
+ ngrams.each do |ngram|
14
+ self.occured(ngram)
15
+ end
16
+ end
17
+ self.rank
18
+ self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
19
+ end
20
+
21
+
22
+ # TODO: needed?
23
+ def clean(text)
24
+ return text
25
+ text = text.gsub('?', '')
26
+ text = text.gsub('.', '')
27
+ text = text.gsub(';', '')
28
+ text = text.gsub(':', '')
29
+ text = text.gsub('(', '')
30
+ text = text.gsub(')', '')
31
+ text = text.gsub('/', '')
32
+ text = text.gsub(/[0-9]*/, '')
33
+ text = text.gsub('+', '')
34
+ text
35
+ end
36
+ # limit this profile to n items
37
+ # profile needs to be ranked first
38
+ # do not use this if you plan to extend the profile later on
39
+ def limit(boundary = 100)
40
+ @profile.reject! do |key, value|
41
+ raise 'Please call rank() first' if value.last == 0
42
+ boundary < value.last
43
+ end
44
+ end
45
+
46
+ # rank the current profile
47
+ # ngrams are sorted by occurence and then ranked
48
+ def rank
49
+ @profile.values.sort do |o1, o2|
50
+ o2.first <=> o1.first
51
+ end.each_with_index do |item, index|
52
+ item[1] = index + 1
53
+ end
54
+
55
+ @profile.values.each do |value|
56
+ value[1] = value[0] / @total_occurences.to_f
57
+ end
58
+ end
59
+
60
+ # Called when a ngram is occured, optional you can pass an
61
+ # amount (how many times the ngram occured)
62
+ def occured(ngram, amount = 1)
63
+ (@profile[ngram] ||= [0, 0])[0] += amount
64
+ @total_occurences += amount
65
+ end
66
+
67
+ # find the occurence of a ngram. if it never occured, returns 0
68
+ def occurence(ngram)
69
+ @profile[ngram] ? @profile[ngram].first : 0
70
+ end
71
+
72
+ # find the ranking of a ngram. if it is not yet ranked, return 0
73
+ def ranking(ngram)
74
+ @profile[ngram] ? @profile[ngram].last : 0
75
+ end
76
+
77
+ # Calculate the distance to another profile
78
+ def distance(other)
79
+ @profile.inject(0) do |memo, item|
80
+ other_ranking = other.ranking(item.first)
81
+ if other_ranking == 0
82
+ memo += 1
83
+ else
84
+ memo += (other_ranking - item.last.last).abs
85
+ end
86
+ end
87
+ end
88
+
89
+
90
+ def to_s
91
+ @profile.inspect
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,42 @@
1
+ class String
2
+
3
+ # TODO: recursive?
4
+ def ngrams(options = {})
5
+ min_length = options[:min_length] || 1
6
+ max_length = options[:max_length] || self.length
7
+ pad = options[:pad] || false
8
+ value = options[:preserve_case] ? self : self.downcase
9
+ value = "_#{value}#{'_' * (value.length - 1)}" if pad
10
+ res = []
11
+ # TODO: use min/max length for loop index instead of looping
12
+ # all and then use if test to decide if to add or not
13
+ 0.upto(value.length - 1) do |index|
14
+ index.upto(value.length - 1) do |len|
15
+ if value[index..len].length >= min_length && value[index..len].length <= max_length
16
+ res << value[index..len]
17
+ end
18
+ end
19
+ end
20
+ res
21
+ end
22
+
23
+ # def byte_grams(options = {})
24
+ # min_length = options[:min_length] || 1
25
+ # max_length = options[:max_length] || self.length
26
+ # value = options[:preserve_case] ? self : self.downcase
27
+ # res = []
28
+ #
29
+ # end
30
+
31
+
32
+ # Ask Babel about the language of this text
33
+ # Can return nil if no language found
34
+ def language(options = {})
35
+ Babel.guess(self, options)
36
+ end
37
+
38
+ # Tell Babel that this text is in a given language
39
+ def language=(lang, options = {})
40
+ Babel.learn(lang, self, options)
41
+ end
42
+ end
data/lib/babel.rb ADDED
@@ -0,0 +1,10 @@
1
+ if RUBY_VERSION < '1.9'
2
+ require 'jcode'
3
+ $KCODE = 'u'
4
+ end
5
+
6
+ require File.dirname(__FILE__) + '/babel/string_extensions'
7
+ require File.dirname(__FILE__) + '/babel/babel'
8
+ require File.dirname(__FILE__) + '/babel/profile'
9
+
10
+ require 'ya2yaml'