simplificator-babel 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +20 -0
- data/README.markdown +19 -0
- data/Rakefile +56 -0
- data/VERSION.yml +4 -0
- data/lib/babel/babel.rb +54 -0
- data/lib/babel/profile.rb +94 -0
- data/lib/babel/string_extensions.rb +42 -0
- data/lib/babel.rb +10 -0
- data/lib/data/udhr_txt/index.xml +385 -0
- data/lib/data/udhr_txt/udhr_007.txt +220 -0
- data/lib/data/udhr_txt/udhr_008.txt +220 -0
- data/lib/data/udhr_txt/udhr_009.txt +228 -0
- data/lib/data/udhr_txt/udhr_010.txt +219 -0
- data/lib/data/udhr_txt/udhr_011.txt +232 -0
- data/lib/data/udhr_txt/udhr_abk.txt +218 -0
- data/lib/data/udhr_txt/udhr_ace.txt +221 -0
- data/lib/data/udhr_txt/udhr_acu.txt +222 -0
- data/lib/data/udhr_txt/udhr_ada.txt +220 -0
- data/lib/data/udhr_txt/udhr_afr.txt +219 -0
- data/lib/data/udhr_txt/udhr_agr.txt +219 -0
- data/lib/data/udhr_txt/udhr_aii.txt +216 -0
- data/lib/data/udhr_txt/udhr_ajg.txt +219 -0
- data/lib/data/udhr_txt/udhr_aka_akuapem.txt +221 -0
- data/lib/data/udhr_txt/udhr_aka_asante.txt +220 -0
- data/lib/data/udhr_txt/udhr_aka_fante.txt +219 -0
- data/lib/data/udhr_txt/udhr_als.txt +220 -0
- data/lib/data/udhr_txt/udhr_amc.txt +215 -0
- data/lib/data/udhr_txt/udhr_ame.txt +222 -0
- data/lib/data/udhr_txt/udhr_amh.txt +209 -0
- data/lib/data/udhr_txt/udhr_amr.txt +221 -0
- data/lib/data/udhr_txt/udhr_arb.txt +220 -0
- data/lib/data/udhr_txt/udhr_arl.txt +222 -0
- data/lib/data/udhr_txt/udhr_arn.txt +218 -0
- data/lib/data/udhr_txt/udhr_ast.txt +221 -0
- data/lib/data/udhr_txt/udhr_auv.txt +217 -0
- data/lib/data/udhr_txt/udhr_ayr.txt +218 -0
- data/lib/data/udhr_txt/udhr_azj_cyrl.txt +218 -0
- data/lib/data/udhr_txt/udhr_azj_latn.txt +218 -0
- data/lib/data/udhr_txt/udhr_bam.txt +218 -0
- data/lib/data/udhr_txt/udhr_ban.txt +222 -0
- data/lib/data/udhr_txt/udhr_bba.txt +218 -0
- data/lib/data/udhr_txt/udhr_bci.txt +217 -0
- data/lib/data/udhr_txt/udhr_bcl.txt +219 -0
- data/lib/data/udhr_txt/udhr_bel.txt +221 -0
- data/lib/data/udhr_txt/udhr_bem.txt +217 -0
- data/lib/data/udhr_txt/udhr_ben.txt +222 -0
- data/lib/data/udhr_txt/udhr_bho.txt +219 -0
- data/lib/data/udhr_txt/udhr_bin.txt +232 -0
- data/lib/data/udhr_txt/udhr_bis.txt +218 -0
- data/lib/data/udhr_txt/udhr_blu.txt +219 -0
- data/lib/data/udhr_txt/udhr_boa.txt +223 -0
- data/lib/data/udhr_txt/udhr_bod.txt +221 -0
- data/lib/data/udhr_txt/udhr_bos_cyrl.txt +220 -0
- data/lib/data/udhr_txt/udhr_bos_latn.txt +220 -0
- data/lib/data/udhr_txt/udhr_bre.txt +222 -0
- data/lib/data/udhr_txt/udhr_btb.txt +217 -0
- data/lib/data/udhr_txt/udhr_bug.txt +222 -0
- data/lib/data/udhr_txt/udhr_bul.txt +218 -0
- data/lib/data/udhr_txt/udhr_cab.txt +221 -0
- data/lib/data/udhr_txt/udhr_cak.txt +217 -0
- data/lib/data/udhr_txt/udhr_cat.txt +220 -0
- data/lib/data/udhr_txt/udhr_cbr.txt +219 -0
- data/lib/data/udhr_txt/udhr_cbs.txt +153 -0
- data/lib/data/udhr_txt/udhr_cbt.txt +220 -0
- data/lib/data/udhr_txt/udhr_cbu.txt +218 -0
- data/lib/data/udhr_txt/udhr_ccx.txt +222 -0
- data/lib/data/udhr_txt/udhr_ceb.txt +218 -0
- data/lib/data/udhr_txt/udhr_ces.txt +221 -0
- data/lib/data/udhr_txt/udhr_cha.txt +219 -0
- data/lib/data/udhr_txt/udhr_chj.txt +220 -0
- data/lib/data/udhr_txt/udhr_chk.txt +220 -0
- data/lib/data/udhr_txt/udhr_chr.txt +10 -0
- data/lib/data/udhr_txt/udhr_cic.txt +220 -0
- data/lib/data/udhr_txt/udhr_cjk.txt +218 -0
- data/lib/data/udhr_txt/udhr_cjk_AO.txt +220 -0
- data/lib/data/udhr_txt/udhr_ckb.txt +217 -0
- data/lib/data/udhr_txt/udhr_cmn_hans.txt +220 -0
- data/lib/data/udhr_txt/udhr_cmn_hant.txt +220 -0
- data/lib/data/udhr_txt/udhr_cnh.txt +220 -0
- data/lib/data/udhr_txt/udhr_cni.txt +220 -0
- data/lib/data/udhr_txt/udhr_cos.txt +218 -0
- data/lib/data/udhr_txt/udhr_cot.txt +222 -0
- data/lib/data/udhr_txt/udhr_cpu.txt +219 -0
- data/lib/data/udhr_txt/udhr_crs.txt +217 -0
- data/lib/data/udhr_txt/udhr_csa.txt +223 -0
- data/lib/data/udhr_txt/udhr_csw.txt +163 -0
- data/lib/data/udhr_txt/udhr_ctd.txt +222 -0
- data/lib/data/udhr_txt/udhr_cym.txt +222 -0
- data/lib/data/udhr_txt/udhr_dag.txt +217 -0
- data/lib/data/udhr_txt/udhr_dan.txt +224 -0
- data/lib/data/udhr_txt/udhr_ddn.txt +217 -0
- data/lib/data/udhr_txt/udhr_deu_1901.txt +220 -0
- data/lib/data/udhr_txt/udhr_deu_1996.txt +220 -0
- data/lib/data/udhr_txt/udhr_dga.txt +220 -0
- data/lib/data/udhr_txt/udhr_dip.txt +217 -0
- data/lib/data/udhr_txt/udhr_div.txt +220 -0
- data/lib/data/udhr_txt/udhr_dyo.txt +217 -0
- data/lib/data/udhr_txt/udhr_dzo.txt +9 -0
- data/lib/data/udhr_txt/udhr_ell_monotonic.txt +220 -0
- data/lib/data/udhr_txt/udhr_ell_polytonic.txt +220 -0
- data/lib/data/udhr_txt/udhr_emk.txt +218 -0
- data/lib/data/udhr_txt/udhr_eml.txt +219 -0
- data/lib/data/udhr_txt/udhr_eng.txt +219 -0
- data/lib/data/udhr_txt/udhr_epo.txt +221 -0
- data/lib/data/udhr_txt/udhr_est.txt +219 -0
- data/lib/data/udhr_txt/udhr_eus.txt +220 -0
- data/lib/data/udhr_txt/udhr_eve.txt +207 -0
- data/lib/data/udhr_txt/udhr_ewe.txt +218 -0
- data/lib/data/udhr_txt/udhr_fao.txt +219 -0
- data/lib/data/udhr_txt/udhr_fij.txt +224 -0
- data/lib/data/udhr_txt/udhr_fin.txt +224 -0
- data/lib/data/udhr_txt/udhr_flm.txt +219 -0
- data/lib/data/udhr_txt/udhr_fon.txt +217 -0
- data/lib/data/udhr_txt/udhr_fra.txt +218 -0
- data/lib/data/udhr_txt/udhr_fri.txt +219 -0
- data/lib/data/udhr_txt/udhr_fuc.txt +217 -0
- data/lib/data/udhr_txt/udhr_fur.txt +220 -0
- data/lib/data/udhr_txt/udhr_gaa.txt +220 -0
- data/lib/data/udhr_txt/udhr_gag.txt +223 -0
- data/lib/data/udhr_txt/udhr_gax.txt +222 -0
- data/lib/data/udhr_txt/udhr_gjn.txt +220 -0
- data/lib/data/udhr_txt/udhr_gkp.txt +216 -0
- data/lib/data/udhr_txt/udhr_gla.txt +229 -0
- data/lib/data/udhr_txt/udhr_gle.txt +215 -0
- data/lib/data/udhr_txt/udhr_glg.txt +217 -0
- data/lib/data/udhr_txt/udhr_guc.txt +221 -0
- data/lib/data/udhr_txt/udhr_gug.txt +210 -0
- data/lib/data/udhr_txt/udhr_guj.txt +219 -0
- data/lib/data/udhr_txt/udhr_gyr.txt +203 -0
- data/lib/data/udhr_txt/udhr_hat_kreyol.txt +221 -0
- data/lib/data/udhr_txt/udhr_hat_popular.txt +221 -0
- data/lib/data/udhr_txt/udhr_hau_NE.txt +219 -0
- data/lib/data/udhr_txt/udhr_hau_NG.txt +219 -0
- data/lib/data/udhr_txt/udhr_haw.txt +219 -0
- data/lib/data/udhr_txt/udhr_hea.txt +219 -0
- data/lib/data/udhr_txt/udhr_heb.txt +216 -0
- data/lib/data/udhr_txt/udhr_hil.txt +217 -0
- data/lib/data/udhr_txt/udhr_hin.txt +222 -0
- data/lib/data/udhr_txt/udhr_hms.txt +219 -0
- data/lib/data/udhr_txt/udhr_hna.txt +217 -0
- data/lib/data/udhr_txt/udhr_hni.txt +218 -0
- data/lib/data/udhr_txt/udhr_hrv.txt +218 -0
- data/lib/data/udhr_txt/udhr_hsb.txt +220 -0
- data/lib/data/udhr_txt/udhr_hun.txt +218 -0
- data/lib/data/udhr_txt/udhr_hus.txt +222 -0
- data/lib/data/udhr_txt/udhr_huu.txt +220 -0
- data/lib/data/udhr_txt/udhr_hva.txt +220 -0
- data/lib/data/udhr_txt/udhr_hye.txt +234 -0
- data/lib/data/udhr_txt/udhr_ibb.txt +235 -0
- data/lib/data/udhr_txt/udhr_ibo.txt +219 -0
- data/lib/data/udhr_txt/udhr_ido.txt +224 -0
- data/lib/data/udhr_txt/udhr_iii.txt +9 -0
- data/lib/data/udhr_txt/udhr_ike.txt +163 -0
- data/lib/data/udhr_txt/udhr_ilo.txt +217 -0
- data/lib/data/udhr_txt/udhr_ina.txt +220 -0
- data/lib/data/udhr_txt/udhr_ind.txt +219 -0
- data/lib/data/udhr_txt/udhr_isl.txt +217 -0
- data/lib/data/udhr_txt/udhr_ita.txt +221 -0
- data/lib/data/udhr_txt/udhr_jav.txt +222 -0
- data/lib/data/udhr_txt/udhr_jpn.txt +219 -0
- data/lib/data/udhr_txt/udhr_kal.txt +218 -0
- data/lib/data/udhr_txt/udhr_kan.txt +216 -0
- data/lib/data/udhr_txt/udhr_kat.txt +221 -0
- data/lib/data/udhr_txt/udhr_kaz.txt +218 -0
- data/lib/data/udhr_txt/udhr_kbp.txt +218 -0
- data/lib/data/udhr_txt/udhr_kde.txt +212 -0
- data/lib/data/udhr_txt/udhr_kea.txt +219 -0
- data/lib/data/udhr_txt/udhr_kek.txt +219 -0
- data/lib/data/udhr_txt/udhr_khk.txt +217 -0
- data/lib/data/udhr_txt/udhr_khk_mong.txt +11 -0
- data/lib/data/udhr_txt/udhr_khm.txt +220 -0
- data/lib/data/udhr_txt/udhr_kin.txt +220 -0
- data/lib/data/udhr_txt/udhr_kir.txt +220 -0
- data/lib/data/udhr_txt/udhr_kmb.txt +219 -0
- data/lib/data/udhr_txt/udhr_knc.txt +230 -0
- data/lib/data/udhr_txt/udhr_kng.txt +219 -0
- data/lib/data/udhr_txt/udhr_kng_AO.txt +219 -0
- data/lib/data/udhr_txt/udhr_koo.txt +216 -0
- data/lib/data/udhr_txt/udhr_kor.txt +219 -0
- data/lib/data/udhr_txt/udhr_kqn.txt +218 -0
- data/lib/data/udhr_txt/udhr_kri.txt +226 -0
- data/lib/data/udhr_txt/udhr_ktu.txt +219 -0
- data/lib/data/udhr_txt/udhr_lao.txt +223 -0
- data/lib/data/udhr_txt/udhr_lat.txt +221 -0
- data/lib/data/udhr_txt/udhr_lat_1.txt +220 -0
- data/lib/data/udhr_txt/udhr_lav.txt +220 -0
- data/lib/data/udhr_txt/udhr_lia.txt +218 -0
- data/lib/data/udhr_txt/udhr_lin.txt +217 -0
- data/lib/data/udhr_txt/udhr_lin_tones.txt +214 -0
- data/lib/data/udhr_txt/udhr_lit.txt +218 -0
- data/lib/data/udhr_txt/udhr_lnc.txt +219 -0
- data/lib/data/udhr_txt/udhr_lns.txt +219 -0
- data/lib/data/udhr_txt/udhr_loz.txt +219 -0
- data/lib/data/udhr_txt/udhr_ltz.txt +218 -0
- data/lib/data/udhr_txt/udhr_lua.txt +219 -0
- data/lib/data/udhr_txt/udhr_lue.txt +217 -0
- data/lib/data/udhr_txt/udhr_lug.txt +216 -0
- data/lib/data/udhr_txt/udhr_lun.txt +216 -0
- data/lib/data/udhr_txt/udhr_mad.txt +223 -0
- data/lib/data/udhr_txt/udhr_mag.txt +220 -0
- data/lib/data/udhr_txt/udhr_mah.txt +220 -0
- data/lib/data/udhr_txt/udhr_mai.txt +223 -0
- data/lib/data/udhr_txt/udhr_mal.txt +210 -0
- data/lib/data/udhr_txt/udhr_mam.txt +218 -0
- data/lib/data/udhr_txt/udhr_mar.txt +219 -0
- data/lib/data/udhr_txt/udhr_maz.txt +218 -0
- data/lib/data/udhr_txt/udhr_mcd.txt +220 -0
- data/lib/data/udhr_txt/udhr_mcf.txt +223 -0
- data/lib/data/udhr_txt/udhr_men.txt +222 -0
- data/lib/data/udhr_txt/udhr_mic.txt +218 -0
- data/lib/data/udhr_txt/udhr_min.txt +221 -0
- data/lib/data/udhr_txt/udhr_miq.txt +213 -0
- data/lib/data/udhr_txt/udhr_mkd.txt +221 -0
- data/lib/data/udhr_txt/udhr_mlt.txt +217 -0
- data/lib/data/udhr_txt/udhr_mly_arab.txt +219 -0
- data/lib/data/udhr_txt/udhr_mly_latn.txt +218 -0
- data/lib/data/udhr_txt/udhr_mos.txt +216 -0
- data/lib/data/udhr_txt/udhr_mri.txt +219 -0
- data/lib/data/udhr_txt/udhr_mxi.txt +218 -0
- data/lib/data/udhr_txt/udhr_mxv.txt +223 -0
- data/lib/data/udhr_txt/udhr_mya.txt +219 -0
- data/lib/data/udhr_txt/udhr_mzi.txt +227 -0
- data/lib/data/udhr_txt/udhr_nav.txt +219 -0
- data/lib/data/udhr_txt/udhr_nba.txt +257 -0
- data/lib/data/udhr_txt/udhr_nbl.txt +218 -0
- data/lib/data/udhr_txt/udhr_ndo.txt +217 -0
- data/lib/data/udhr_txt/udhr_nep.txt +214 -0
- data/lib/data/udhr_txt/udhr_nhn.txt +221 -0
- data/lib/data/udhr_txt/udhr_nld.txt +217 -0
- data/lib/data/udhr_txt/udhr_nno.txt +219 -0
- data/lib/data/udhr_txt/udhr_nob.txt +225 -0
- data/lib/data/udhr_txt/udhr_not.txt +218 -0
- data/lib/data/udhr_txt/udhr_nso.txt +219 -0
- data/lib/data/udhr_txt/udhr_nya_chechewa.txt +221 -0
- data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +218 -0
- data/lib/data/udhr_txt/udhr_nym.txt +229 -0
- data/lib/data/udhr_txt/udhr_nyn.txt +213 -0
- data/lib/data/udhr_txt/udhr_nzi.txt +221 -0
- data/lib/data/udhr_txt/udhr_ojb.txt +221 -0
- data/lib/data/udhr_txt/udhr_oss.txt +214 -0
- data/lib/data/udhr_txt/udhr_ote.txt +218 -0
- data/lib/data/udhr_txt/udhr_pam.txt +225 -0
- data/lib/data/udhr_txt/udhr_pan.txt +227 -0
- data/lib/data/udhr_txt/udhr_pau.txt +219 -0
- data/lib/data/udhr_txt/udhr_pbb.txt +218 -0
- data/lib/data/udhr_txt/udhr_pbu.txt +9 -0
- data/lib/data/udhr_txt/udhr_pcd.txt +218 -0
- data/lib/data/udhr_txt/udhr_pcm.txt +218 -0
- data/lib/data/udhr_txt/udhr_pes_1.txt +218 -0
- data/lib/data/udhr_txt/udhr_pes_2.txt +222 -0
- data/lib/data/udhr_txt/udhr_pis.txt +219 -0
- data/lib/data/udhr_txt/udhr_plt.txt +214 -0
- data/lib/data/udhr_txt/udhr_pnb.txt +223 -0
- data/lib/data/udhr_txt/udhr_pol.txt +220 -0
- data/lib/data/udhr_txt/udhr_pon.txt +218 -0
- data/lib/data/udhr_txt/udhr_por_BR.txt +231 -0
- data/lib/data/udhr_txt/udhr_por_PT.txt +219 -0
- data/lib/data/udhr_txt/udhr_pov.txt +220 -0
- data/lib/data/udhr_txt/udhr_ppl.txt +219 -0
- data/lib/data/udhr_txt/udhr_prq.txt +151 -0
- data/lib/data/udhr_txt/udhr_prv.txt +207 -0
- data/lib/data/udhr_txt/udhr_quc.txt +217 -0
- data/lib/data/udhr_txt/udhr_qud.txt +218 -0
- data/lib/data/udhr_txt/udhr_quy.txt +221 -0
- data/lib/data/udhr_txt/udhr_quz.txt +223 -0
- data/lib/data/udhr_txt/udhr_qva.txt +219 -0
- data/lib/data/udhr_txt/udhr_qvc.txt +218 -0
- data/lib/data/udhr_txt/udhr_qvh.txt +217 -0
- data/lib/data/udhr_txt/udhr_qvm.txt +219 -0
- data/lib/data/udhr_txt/udhr_qvn.txt +217 -0
- data/lib/data/udhr_txt/udhr_qwh.txt +218 -0
- data/lib/data/udhr_txt/udhr_qxa.txt +217 -0
- data/lib/data/udhr_txt/udhr_qxn.txt +216 -0
- data/lib/data/udhr_txt/udhr_qxu.txt +221 -0
- data/lib/data/udhr_txt/udhr_rar.txt +220 -0
- data/lib/data/udhr_txt/udhr_rmn.txt +220 -0
- data/lib/data/udhr_txt/udhr_rmn_1.txt +221 -0
- data/lib/data/udhr_txt/udhr_rmy.txt +218 -0
- data/lib/data/udhr_txt/udhr_roh.txt +217 -0
- data/lib/data/udhr_txt/udhr_ron_1953.txt +218 -0
- data/lib/data/udhr_txt/udhr_ron_1993.txt +218 -0
- data/lib/data/udhr_txt/udhr_ron_2006.txt +218 -0
- data/lib/data/udhr_txt/udhr_run.txt +218 -0
- data/lib/data/udhr_txt/udhr_rus.txt +220 -0
- data/lib/data/udhr_txt/udhr_sag.txt +220 -0
- data/lib/data/udhr_txt/udhr_san.txt +219 -0
- data/lib/data/udhr_txt/udhr_sco.txt +222 -0
- data/lib/data/udhr_txt/udhr_shp.txt +224 -0
- data/lib/data/udhr_txt/udhr_skr.txt +225 -0
- data/lib/data/udhr_txt/udhr_slk.txt +219 -0
- data/lib/data/udhr_txt/udhr_slv.txt +218 -0
- data/lib/data/udhr_txt/udhr_sme.txt +220 -0
- data/lib/data/udhr_txt/udhr_smo.txt +226 -0
- data/lib/data/udhr_txt/udhr_sna.txt +223 -0
- data/lib/data/udhr_txt/udhr_snk.txt +220 -0
- data/lib/data/udhr_txt/udhr_som.txt +216 -0
- data/lib/data/udhr_txt/udhr_sot.txt +220 -0
- data/lib/data/udhr_txt/udhr_spa.txt +220 -0
- data/lib/data/udhr_txt/udhr_src.txt +220 -0
- data/lib/data/udhr_txt/udhr_srp_cyrl.txt +218 -0
- data/lib/data/udhr_txt/udhr_srp_latn.txt +218 -0
- data/lib/data/udhr_txt/udhr_srr.txt +219 -0
- data/lib/data/udhr_txt/udhr_ssw.txt +228 -0
- data/lib/data/udhr_txt/udhr_suk.txt +218 -0
- data/lib/data/udhr_txt/udhr_sun.txt +227 -0
- data/lib/data/udhr_txt/udhr_sus.txt +218 -0
- data/lib/data/udhr_txt/udhr_swe.txt +224 -0
- data/lib/data/udhr_txt/udhr_swh.txt +221 -0
- data/lib/data/udhr_txt/udhr_tah.txt +217 -0
- data/lib/data/udhr_txt/udhr_taj.txt +10 -0
- data/lib/data/udhr_txt/udhr_tam.txt +227 -0
- data/lib/data/udhr_txt/udhr_tat.txt +219 -0
- data/lib/data/udhr_txt/udhr_tbz.txt +219 -0
- data/lib/data/udhr_txt/udhr_tca.txt +219 -0
- data/lib/data/udhr_txt/udhr_tem.txt +216 -0
- data/lib/data/udhr_txt/udhr_tet.txt +219 -0
- data/lib/data/udhr_txt/udhr_tgk.txt +217 -0
- data/lib/data/udhr_txt/udhr_tgl.txt +224 -0
- data/lib/data/udhr_txt/udhr_tgl_tglg.txt +9 -0
- data/lib/data/udhr_txt/udhr_tha.txt +217 -0
- data/lib/data/udhr_txt/udhr_tir.txt +217 -0
- data/lib/data/udhr_txt/udhr_tiv.txt +232 -0
- data/lib/data/udhr_txt/udhr_tob.txt +218 -0
- data/lib/data/udhr_txt/udhr_toi.txt +216 -0
- data/lib/data/udhr_txt/udhr_toj.txt +219 -0
- data/lib/data/udhr_txt/udhr_ton.txt +221 -0
- data/lib/data/udhr_txt/udhr_top.txt +220 -0
- data/lib/data/udhr_txt/udhr_tpi.txt +219 -0
- data/lib/data/udhr_txt/udhr_tsn.txt +219 -0
- data/lib/data/udhr_txt/udhr_tso_MZ.txt +220 -0
- data/lib/data/udhr_txt/udhr_tsz.txt +218 -0
- data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +216 -0
- data/lib/data/udhr_txt/udhr_tuk_latn.txt +221 -0
- data/lib/data/udhr_txt/udhr_tur.txt +219 -0
- data/lib/data/udhr_txt/udhr_tzc.txt +219 -0
- data/lib/data/udhr_txt/udhr_tzh.txt +218 -0
- data/lib/data/udhr_txt/udhr_tzm.txt +220 -0
- data/lib/data/udhr_txt/udhr_tzm_tfng.txt +9 -0
- data/lib/data/udhr_txt/udhr_uig_arab.txt +219 -0
- data/lib/data/udhr_txt/udhr_uig_latn.txt +219 -0
- data/lib/data/udhr_txt/udhr_ukr.txt +218 -0
- data/lib/data/udhr_txt/udhr_umb.txt +218 -0
- data/lib/data/udhr_txt/udhr_ura.txt +219 -0
- data/lib/data/udhr_txt/udhr_urd.txt +9 -0
- data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +220 -0
- data/lib/data/udhr_txt/udhr_uzn_latn.txt +220 -0
- data/lib/data/udhr_txt/udhr_vai.txt +224 -0
- data/lib/data/udhr_txt/udhr_vie.txt +221 -0
- data/lib/data/udhr_txt/udhr_vmw.txt +220 -0
- data/lib/data/udhr_txt/udhr_war.txt +219 -0
- data/lib/data/udhr_txt/udhr_wln.txt +220 -0
- data/lib/data/udhr_txt/udhr_wol.txt +219 -0
- data/lib/data/udhr_txt/udhr_wwa.txt +109 -0
- data/lib/data/udhr_txt/udhr_xho.txt +219 -0
- data/lib/data/udhr_txt/udhr_xsm.txt +219 -0
- data/lib/data/udhr_txt/udhr_yad.txt +220 -0
- data/lib/data/udhr_txt/udhr_yao.txt +214 -0
- data/lib/data/udhr_txt/udhr_yap.txt +220 -0
- data/lib/data/udhr_txt/udhr_ydd.txt +223 -0
- data/lib/data/udhr_txt/udhr_ykg.txt +211 -0
- data/lib/data/udhr_txt/udhr_yor.txt +218 -0
- data/lib/data/udhr_txt/udhr_yua.txt +218 -0
- data/lib/data/udhr_txt/udhr_zam.txt +223 -0
- data/lib/data/udhr_txt/udhr_ztu.txt +219 -0
- data/lib/data/udhr_txt/udhr_zul.txt +219 -0
- data/lib/profiles/profile_deu_1996.yml +25362 -0
- data/lib/profiles/profile_eng.yml +20794 -0
- data/lib/profiles/profile_fra.yml +24964 -0
- data/lib/profiles/profile_spa.yml +23020 -0
- data/test/babel_test.rb +44 -0
- data/test/profile_test.rb +105 -0
- data/test/string_extensions_test.rb +43 -0
- data/test/test_helper.rb +10 -0
- data/test/train.rb +26 -0
- metadata +440 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Simplificator GmbH
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#babel
|
2
|
+
|
3
|
+
Babel is a gem to identify in what language a text is written.
|
4
|
+
It is based on the n-gram approach by Cacnar and Trenkle as described in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
|
5
|
+
|
6
|
+
|
7
|
+
##usage
|
8
|
+
require 'rubygems'
|
9
|
+
require 'simplificator-babel'
|
10
|
+
|
11
|
+
# Train babel: feed it some texts
|
12
|
+
'An english text to train and learn'.language= 'en'
|
13
|
+
'Ein deutscher Text'.language= 'de'
|
14
|
+
|
15
|
+
puts
|
16
|
+
|
17
|
+
##Copyright
|
18
|
+
|
19
|
+
Copyright (c) 2009 Simplificator GmbH. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "babel"
|
8
|
+
gem.summary = %Q{Utility to guess the language of a text}
|
9
|
+
gem.email = "info@simplificator.com"
|
10
|
+
gem.homepage = "http://github.com/simplificator/babel"
|
11
|
+
gem.authors = ["simplificator"]
|
12
|
+
gem.add_dependency('ya2yaml', '>= 0.2.6')
|
13
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
14
|
+
end
|
15
|
+
rescue LoadError
|
16
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
Rake::TestTask.new(:test) do |test|
|
21
|
+
test.libs << 'lib' << 'test'
|
22
|
+
test.pattern = 'test/**/*_test.rb'
|
23
|
+
test.verbose = true
|
24
|
+
end
|
25
|
+
|
26
|
+
begin
|
27
|
+
require 'rcov/rcovtask'
|
28
|
+
Rcov::RcovTask.new do |test|
|
29
|
+
test.libs << 'test'
|
30
|
+
test.pattern = 'test/**/*_test.rb'
|
31
|
+
test.verbose = true
|
32
|
+
end
|
33
|
+
rescue LoadError
|
34
|
+
task :rcov do
|
35
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
task :default => :test
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
if File.exist?('VERSION.yml')
|
45
|
+
config = YAML.load(File.read('VERSION.yml'))
|
46
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
47
|
+
else
|
48
|
+
version = ""
|
49
|
+
end
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "babel #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
56
|
+
|
data/VERSION.yml
ADDED
data/lib/babel/babel.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module Babel
|
2
|
+
@profiles = {}
|
3
|
+
PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
|
4
|
+
def self.learn(lang, text, options = {})
|
5
|
+
lang = lang.to_s
|
6
|
+
profile = @profiles[lang] ||= Profile.new()
|
7
|
+
profile.learn(text, options)
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
def self.clear_profiles
|
12
|
+
@profiles = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.guess(source, options = {})
|
16
|
+
found = nil
|
17
|
+
Babel.distances(source).each do |entry|
|
18
|
+
found = entry if found.nil? || entry.last < found.last
|
19
|
+
end
|
20
|
+
found.first if found
|
21
|
+
end
|
22
|
+
|
23
|
+
# An array of arrays of [language, distance] arrays
|
24
|
+
def self.distances(text)
|
25
|
+
source = Profile.new.learn(text)
|
26
|
+
@profiles.map { |lang, target| [lang, source.distance(target)] }
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def self.file_name(dir, lang)
|
32
|
+
File.join(dir, "profile_#{lang}.yml")
|
33
|
+
end
|
34
|
+
|
35
|
+
# Load a specific profile ()
|
36
|
+
def self.load_profiles(options = {})
|
37
|
+
dir = options[:directory] || PROFILE_DIR
|
38
|
+
Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
|
39
|
+
file =~ /profile_(.+)\.yml/
|
40
|
+
@profiles[$1] = YAML.load_file(file)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.save_profiles(options = {})
|
45
|
+
dir = options[:directory] || PROFILE_DIR
|
46
|
+
@profiles.each do |lang, profile|
|
47
|
+
profile.limit(options[:limit]) if options.has_key?(:limit)
|
48
|
+
File.open(file_name(dir, lang), 'wb') do |file|
|
49
|
+
file.write(profile.ya2yaml)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Babel
|
2
|
+
class Profile
|
3
|
+
def initialize()
|
4
|
+
@profile = {}
|
5
|
+
@total_occurences = 0
|
6
|
+
end
|
7
|
+
|
8
|
+
def learn(text, options = {})
|
9
|
+
options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
|
10
|
+
text = clean(text)
|
11
|
+
text.split(' ').each do |word|
|
12
|
+
ngrams = word.ngrams(options)
|
13
|
+
ngrams.each do |ngram|
|
14
|
+
self.occured(ngram)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
self.rank
|
18
|
+
self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# TODO: needed?
|
23
|
+
def clean(text)
|
24
|
+
return text
|
25
|
+
text = text.gsub('?', '')
|
26
|
+
text = text.gsub('.', '')
|
27
|
+
text = text.gsub(';', '')
|
28
|
+
text = text.gsub(':', '')
|
29
|
+
text = text.gsub('(', '')
|
30
|
+
text = text.gsub(')', '')
|
31
|
+
text = text.gsub('/', '')
|
32
|
+
text = text.gsub(/[0-9]*/, '')
|
33
|
+
text = text.gsub('+', '')
|
34
|
+
text
|
35
|
+
end
|
36
|
+
# limit this profile to n items
|
37
|
+
# profile needs to be ranked first
|
38
|
+
# do not use this if you plan to extend the profile later on
|
39
|
+
def limit(boundary = 100)
|
40
|
+
@profile.reject! do |key, value|
|
41
|
+
raise 'Please call rank() first' if value.last == 0
|
42
|
+
boundary < value.last
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# rank the current profile
|
47
|
+
# ngrams are sorted by occurence and then ranked
|
48
|
+
def rank
|
49
|
+
@profile.values.sort do |o1, o2|
|
50
|
+
o2.first <=> o1.first
|
51
|
+
end.each_with_index do |item, index|
|
52
|
+
item[1] = index + 1
|
53
|
+
end
|
54
|
+
|
55
|
+
@profile.values.each do |value|
|
56
|
+
value[1] = value[0] / @total_occurences.to_f
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Called when a ngram is occured, optional you can pass an
|
61
|
+
# amount (how many times the ngram occured)
|
62
|
+
def occured(ngram, amount = 1)
|
63
|
+
(@profile[ngram] ||= [0, 0])[0] += amount
|
64
|
+
@total_occurences += amount
|
65
|
+
end
|
66
|
+
|
67
|
+
# find the occurence of a ngram. if it never occured, returns 0
|
68
|
+
def occurence(ngram)
|
69
|
+
@profile[ngram] ? @profile[ngram].first : 0
|
70
|
+
end
|
71
|
+
|
72
|
+
# find the ranking of a ngram. if it is not yet ranked, return 0
|
73
|
+
def ranking(ngram)
|
74
|
+
@profile[ngram] ? @profile[ngram].last : 0
|
75
|
+
end
|
76
|
+
|
77
|
+
# Calculate the distance to another profile
|
78
|
+
def distance(other)
|
79
|
+
@profile.inject(0) do |memo, item|
|
80
|
+
other_ranking = other.ranking(item.first)
|
81
|
+
if other_ranking == 0
|
82
|
+
memo += 1
|
83
|
+
else
|
84
|
+
memo += (other_ranking - item.last.last).abs
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
@profile.inspect
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class String
|
2
|
+
|
3
|
+
# TODO: recursive?
|
4
|
+
def ngrams(options = {})
|
5
|
+
min_length = options[:min_length] || 1
|
6
|
+
max_length = options[:max_length] || self.length
|
7
|
+
pad = options[:pad] || false
|
8
|
+
value = options[:preserve_case] ? self : self.downcase
|
9
|
+
value = "_#{value}#{'_' * (value.length - 1)}" if pad
|
10
|
+
res = []
|
11
|
+
# TODO: use min/max length for loop index instead of looping
|
12
|
+
# all and then use if test to decide if to add or not
|
13
|
+
0.upto(value.length - 1) do |index|
|
14
|
+
index.upto(value.length - 1) do |len|
|
15
|
+
if value[index..len].length >= min_length && value[index..len].length <= max_length
|
16
|
+
res << value[index..len]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
res
|
21
|
+
end
|
22
|
+
|
23
|
+
# def byte_grams(options = {})
|
24
|
+
# min_length = options[:min_length] || 1
|
25
|
+
# max_length = options[:max_length] || self.length
|
26
|
+
# value = options[:preserve_case] ? self : self.downcase
|
27
|
+
# res = []
|
28
|
+
#
|
29
|
+
# end
|
30
|
+
|
31
|
+
|
32
|
+
# Ask Babel about the language of this text
|
33
|
+
# Can return nil if no language found
|
34
|
+
def language(options = {})
|
35
|
+
Babel.guess(self, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Tell Babel that this text is in a given language
|
39
|
+
def language=(lang, options = {})
|
40
|
+
Babel.learn(lang, self, options)
|
41
|
+
end
|
42
|
+
end
|
data/lib/babel.rb
ADDED