simplificator-babel 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.markdown +19 -0
- data/Rakefile +56 -0
- data/VERSION.yml +4 -0
- data/lib/babel/babel.rb +54 -0
- data/lib/babel/profile.rb +94 -0
- data/lib/babel/string_extensions.rb +42 -0
- data/lib/babel.rb +10 -0
- data/lib/data/udhr_txt/index.xml +385 -0
- data/lib/data/udhr_txt/udhr_007.txt +220 -0
- data/lib/data/udhr_txt/udhr_008.txt +220 -0
- data/lib/data/udhr_txt/udhr_009.txt +228 -0
- data/lib/data/udhr_txt/udhr_010.txt +219 -0
- data/lib/data/udhr_txt/udhr_011.txt +232 -0
- data/lib/data/udhr_txt/udhr_abk.txt +218 -0
- data/lib/data/udhr_txt/udhr_ace.txt +221 -0
- data/lib/data/udhr_txt/udhr_acu.txt +222 -0
- data/lib/data/udhr_txt/udhr_ada.txt +220 -0
- data/lib/data/udhr_txt/udhr_afr.txt +219 -0
- data/lib/data/udhr_txt/udhr_agr.txt +219 -0
- data/lib/data/udhr_txt/udhr_aii.txt +216 -0
- data/lib/data/udhr_txt/udhr_ajg.txt +219 -0
- data/lib/data/udhr_txt/udhr_aka_akuapem.txt +221 -0
- data/lib/data/udhr_txt/udhr_aka_asante.txt +220 -0
- data/lib/data/udhr_txt/udhr_aka_fante.txt +219 -0
- data/lib/data/udhr_txt/udhr_als.txt +220 -0
- data/lib/data/udhr_txt/udhr_amc.txt +215 -0
- data/lib/data/udhr_txt/udhr_ame.txt +222 -0
- data/lib/data/udhr_txt/udhr_amh.txt +209 -0
- data/lib/data/udhr_txt/udhr_amr.txt +221 -0
- data/lib/data/udhr_txt/udhr_arb.txt +220 -0
- data/lib/data/udhr_txt/udhr_arl.txt +222 -0
- data/lib/data/udhr_txt/udhr_arn.txt +218 -0
- data/lib/data/udhr_txt/udhr_ast.txt +221 -0
- data/lib/data/udhr_txt/udhr_auv.txt +217 -0
- data/lib/data/udhr_txt/udhr_ayr.txt +218 -0
- data/lib/data/udhr_txt/udhr_azj_cyrl.txt +218 -0
- data/lib/data/udhr_txt/udhr_azj_latn.txt +218 -0
- data/lib/data/udhr_txt/udhr_bam.txt +218 -0
- data/lib/data/udhr_txt/udhr_ban.txt +222 -0
- data/lib/data/udhr_txt/udhr_bba.txt +218 -0
- data/lib/data/udhr_txt/udhr_bci.txt +217 -0
- data/lib/data/udhr_txt/udhr_bcl.txt +219 -0
- data/lib/data/udhr_txt/udhr_bel.txt +221 -0
- data/lib/data/udhr_txt/udhr_bem.txt +217 -0
- data/lib/data/udhr_txt/udhr_ben.txt +222 -0
- data/lib/data/udhr_txt/udhr_bho.txt +219 -0
- data/lib/data/udhr_txt/udhr_bin.txt +232 -0
- data/lib/data/udhr_txt/udhr_bis.txt +218 -0
- data/lib/data/udhr_txt/udhr_blu.txt +219 -0
- data/lib/data/udhr_txt/udhr_boa.txt +223 -0
- data/lib/data/udhr_txt/udhr_bod.txt +221 -0
- data/lib/data/udhr_txt/udhr_bos_cyrl.txt +220 -0
- data/lib/data/udhr_txt/udhr_bos_latn.txt +220 -0
- data/lib/data/udhr_txt/udhr_bre.txt +222 -0
- data/lib/data/udhr_txt/udhr_btb.txt +217 -0
- data/lib/data/udhr_txt/udhr_bug.txt +222 -0
- data/lib/data/udhr_txt/udhr_bul.txt +218 -0
- data/lib/data/udhr_txt/udhr_cab.txt +221 -0
- data/lib/data/udhr_txt/udhr_cak.txt +217 -0
- data/lib/data/udhr_txt/udhr_cat.txt +220 -0
- data/lib/data/udhr_txt/udhr_cbr.txt +219 -0
- data/lib/data/udhr_txt/udhr_cbs.txt +153 -0
- data/lib/data/udhr_txt/udhr_cbt.txt +220 -0
- data/lib/data/udhr_txt/udhr_cbu.txt +218 -0
- data/lib/data/udhr_txt/udhr_ccx.txt +222 -0
- data/lib/data/udhr_txt/udhr_ceb.txt +218 -0
- data/lib/data/udhr_txt/udhr_ces.txt +221 -0
- data/lib/data/udhr_txt/udhr_cha.txt +219 -0
- data/lib/data/udhr_txt/udhr_chj.txt +220 -0
- data/lib/data/udhr_txt/udhr_chk.txt +220 -0
- data/lib/data/udhr_txt/udhr_chr.txt +10 -0
- data/lib/data/udhr_txt/udhr_cic.txt +220 -0
- data/lib/data/udhr_txt/udhr_cjk.txt +218 -0
- data/lib/data/udhr_txt/udhr_cjk_AO.txt +220 -0
- data/lib/data/udhr_txt/udhr_ckb.txt +217 -0
- data/lib/data/udhr_txt/udhr_cmn_hans.txt +220 -0
- data/lib/data/udhr_txt/udhr_cmn_hant.txt +220 -0
- data/lib/data/udhr_txt/udhr_cnh.txt +220 -0
- data/lib/data/udhr_txt/udhr_cni.txt +220 -0
- data/lib/data/udhr_txt/udhr_cos.txt +218 -0
- data/lib/data/udhr_txt/udhr_cot.txt +222 -0
- data/lib/data/udhr_txt/udhr_cpu.txt +219 -0
- data/lib/data/udhr_txt/udhr_crs.txt +217 -0
- data/lib/data/udhr_txt/udhr_csa.txt +223 -0
- data/lib/data/udhr_txt/udhr_csw.txt +163 -0
- data/lib/data/udhr_txt/udhr_ctd.txt +222 -0
- data/lib/data/udhr_txt/udhr_cym.txt +222 -0
- data/lib/data/udhr_txt/udhr_dag.txt +217 -0
- data/lib/data/udhr_txt/udhr_dan.txt +224 -0
- data/lib/data/udhr_txt/udhr_ddn.txt +217 -0
- data/lib/data/udhr_txt/udhr_deu_1901.txt +220 -0
- data/lib/data/udhr_txt/udhr_deu_1996.txt +220 -0
- data/lib/data/udhr_txt/udhr_dga.txt +220 -0
- data/lib/data/udhr_txt/udhr_dip.txt +217 -0
- data/lib/data/udhr_txt/udhr_div.txt +220 -0
- data/lib/data/udhr_txt/udhr_dyo.txt +217 -0
- data/lib/data/udhr_txt/udhr_dzo.txt +9 -0
- data/lib/data/udhr_txt/udhr_ell_monotonic.txt +220 -0
- data/lib/data/udhr_txt/udhr_ell_polytonic.txt +220 -0
- data/lib/data/udhr_txt/udhr_emk.txt +218 -0
- data/lib/data/udhr_txt/udhr_eml.txt +219 -0
- data/lib/data/udhr_txt/udhr_eng.txt +219 -0
- data/lib/data/udhr_txt/udhr_epo.txt +221 -0
- data/lib/data/udhr_txt/udhr_est.txt +219 -0
- data/lib/data/udhr_txt/udhr_eus.txt +220 -0
- data/lib/data/udhr_txt/udhr_eve.txt +207 -0
- data/lib/data/udhr_txt/udhr_ewe.txt +218 -0
- data/lib/data/udhr_txt/udhr_fao.txt +219 -0
- data/lib/data/udhr_txt/udhr_fij.txt +224 -0
- data/lib/data/udhr_txt/udhr_fin.txt +224 -0
- data/lib/data/udhr_txt/udhr_flm.txt +219 -0
- data/lib/data/udhr_txt/udhr_fon.txt +217 -0
- data/lib/data/udhr_txt/udhr_fra.txt +218 -0
- data/lib/data/udhr_txt/udhr_fri.txt +219 -0
- data/lib/data/udhr_txt/udhr_fuc.txt +217 -0
- data/lib/data/udhr_txt/udhr_fur.txt +220 -0
- data/lib/data/udhr_txt/udhr_gaa.txt +220 -0
- data/lib/data/udhr_txt/udhr_gag.txt +223 -0
- data/lib/data/udhr_txt/udhr_gax.txt +222 -0
- data/lib/data/udhr_txt/udhr_gjn.txt +220 -0
- data/lib/data/udhr_txt/udhr_gkp.txt +216 -0
- data/lib/data/udhr_txt/udhr_gla.txt +229 -0
- data/lib/data/udhr_txt/udhr_gle.txt +215 -0
- data/lib/data/udhr_txt/udhr_glg.txt +217 -0
- data/lib/data/udhr_txt/udhr_guc.txt +221 -0
- data/lib/data/udhr_txt/udhr_gug.txt +210 -0
- data/lib/data/udhr_txt/udhr_guj.txt +219 -0
- data/lib/data/udhr_txt/udhr_gyr.txt +203 -0
- data/lib/data/udhr_txt/udhr_hat_kreyol.txt +221 -0
- data/lib/data/udhr_txt/udhr_hat_popular.txt +221 -0
- data/lib/data/udhr_txt/udhr_hau_NE.txt +219 -0
- data/lib/data/udhr_txt/udhr_hau_NG.txt +219 -0
- data/lib/data/udhr_txt/udhr_haw.txt +219 -0
- data/lib/data/udhr_txt/udhr_hea.txt +219 -0
- data/lib/data/udhr_txt/udhr_heb.txt +216 -0
- data/lib/data/udhr_txt/udhr_hil.txt +217 -0
- data/lib/data/udhr_txt/udhr_hin.txt +222 -0
- data/lib/data/udhr_txt/udhr_hms.txt +219 -0
- data/lib/data/udhr_txt/udhr_hna.txt +217 -0
- data/lib/data/udhr_txt/udhr_hni.txt +218 -0
- data/lib/data/udhr_txt/udhr_hrv.txt +218 -0
- data/lib/data/udhr_txt/udhr_hsb.txt +220 -0
- data/lib/data/udhr_txt/udhr_hun.txt +218 -0
- data/lib/data/udhr_txt/udhr_hus.txt +222 -0
- data/lib/data/udhr_txt/udhr_huu.txt +220 -0
- data/lib/data/udhr_txt/udhr_hva.txt +220 -0
- data/lib/data/udhr_txt/udhr_hye.txt +234 -0
- data/lib/data/udhr_txt/udhr_ibb.txt +235 -0
- data/lib/data/udhr_txt/udhr_ibo.txt +219 -0
- data/lib/data/udhr_txt/udhr_ido.txt +224 -0
- data/lib/data/udhr_txt/udhr_iii.txt +9 -0
- data/lib/data/udhr_txt/udhr_ike.txt +163 -0
- data/lib/data/udhr_txt/udhr_ilo.txt +217 -0
- data/lib/data/udhr_txt/udhr_ina.txt +220 -0
- data/lib/data/udhr_txt/udhr_ind.txt +219 -0
- data/lib/data/udhr_txt/udhr_isl.txt +217 -0
- data/lib/data/udhr_txt/udhr_ita.txt +221 -0
- data/lib/data/udhr_txt/udhr_jav.txt +222 -0
- data/lib/data/udhr_txt/udhr_jpn.txt +219 -0
- data/lib/data/udhr_txt/udhr_kal.txt +218 -0
- data/lib/data/udhr_txt/udhr_kan.txt +216 -0
- data/lib/data/udhr_txt/udhr_kat.txt +221 -0
- data/lib/data/udhr_txt/udhr_kaz.txt +218 -0
- data/lib/data/udhr_txt/udhr_kbp.txt +218 -0
- data/lib/data/udhr_txt/udhr_kde.txt +212 -0
- data/lib/data/udhr_txt/udhr_kea.txt +219 -0
- data/lib/data/udhr_txt/udhr_kek.txt +219 -0
- data/lib/data/udhr_txt/udhr_khk.txt +217 -0
- data/lib/data/udhr_txt/udhr_khk_mong.txt +11 -0
- data/lib/data/udhr_txt/udhr_khm.txt +220 -0
- data/lib/data/udhr_txt/udhr_kin.txt +220 -0
- data/lib/data/udhr_txt/udhr_kir.txt +220 -0
- data/lib/data/udhr_txt/udhr_kmb.txt +219 -0
- data/lib/data/udhr_txt/udhr_knc.txt +230 -0
- data/lib/data/udhr_txt/udhr_kng.txt +219 -0
- data/lib/data/udhr_txt/udhr_kng_AO.txt +219 -0
- data/lib/data/udhr_txt/udhr_koo.txt +216 -0
- data/lib/data/udhr_txt/udhr_kor.txt +219 -0
- data/lib/data/udhr_txt/udhr_kqn.txt +218 -0
- data/lib/data/udhr_txt/udhr_kri.txt +226 -0
- data/lib/data/udhr_txt/udhr_ktu.txt +219 -0
- data/lib/data/udhr_txt/udhr_lao.txt +223 -0
- data/lib/data/udhr_txt/udhr_lat.txt +221 -0
- data/lib/data/udhr_txt/udhr_lat_1.txt +220 -0
- data/lib/data/udhr_txt/udhr_lav.txt +220 -0
- data/lib/data/udhr_txt/udhr_lia.txt +218 -0
- data/lib/data/udhr_txt/udhr_lin.txt +217 -0
- data/lib/data/udhr_txt/udhr_lin_tones.txt +214 -0
- data/lib/data/udhr_txt/udhr_lit.txt +218 -0
- data/lib/data/udhr_txt/udhr_lnc.txt +219 -0
- data/lib/data/udhr_txt/udhr_lns.txt +219 -0
- data/lib/data/udhr_txt/udhr_loz.txt +219 -0
- data/lib/data/udhr_txt/udhr_ltz.txt +218 -0
- data/lib/data/udhr_txt/udhr_lua.txt +219 -0
- data/lib/data/udhr_txt/udhr_lue.txt +217 -0
- data/lib/data/udhr_txt/udhr_lug.txt +216 -0
- data/lib/data/udhr_txt/udhr_lun.txt +216 -0
- data/lib/data/udhr_txt/udhr_mad.txt +223 -0
- data/lib/data/udhr_txt/udhr_mag.txt +220 -0
- data/lib/data/udhr_txt/udhr_mah.txt +220 -0
- data/lib/data/udhr_txt/udhr_mai.txt +223 -0
- data/lib/data/udhr_txt/udhr_mal.txt +210 -0
- data/lib/data/udhr_txt/udhr_mam.txt +218 -0
- data/lib/data/udhr_txt/udhr_mar.txt +219 -0
- data/lib/data/udhr_txt/udhr_maz.txt +218 -0
- data/lib/data/udhr_txt/udhr_mcd.txt +220 -0
- data/lib/data/udhr_txt/udhr_mcf.txt +223 -0
- data/lib/data/udhr_txt/udhr_men.txt +222 -0
- data/lib/data/udhr_txt/udhr_mic.txt +218 -0
- data/lib/data/udhr_txt/udhr_min.txt +221 -0
- data/lib/data/udhr_txt/udhr_miq.txt +213 -0
- data/lib/data/udhr_txt/udhr_mkd.txt +221 -0
- data/lib/data/udhr_txt/udhr_mlt.txt +217 -0
- data/lib/data/udhr_txt/udhr_mly_arab.txt +219 -0
- data/lib/data/udhr_txt/udhr_mly_latn.txt +218 -0
- data/lib/data/udhr_txt/udhr_mos.txt +216 -0
- data/lib/data/udhr_txt/udhr_mri.txt +219 -0
- data/lib/data/udhr_txt/udhr_mxi.txt +218 -0
- data/lib/data/udhr_txt/udhr_mxv.txt +223 -0
- data/lib/data/udhr_txt/udhr_mya.txt +219 -0
- data/lib/data/udhr_txt/udhr_mzi.txt +227 -0
- data/lib/data/udhr_txt/udhr_nav.txt +219 -0
- data/lib/data/udhr_txt/udhr_nba.txt +257 -0
- data/lib/data/udhr_txt/udhr_nbl.txt +218 -0
- data/lib/data/udhr_txt/udhr_ndo.txt +217 -0
- data/lib/data/udhr_txt/udhr_nep.txt +214 -0
- data/lib/data/udhr_txt/udhr_nhn.txt +221 -0
- data/lib/data/udhr_txt/udhr_nld.txt +217 -0
- data/lib/data/udhr_txt/udhr_nno.txt +219 -0
- data/lib/data/udhr_txt/udhr_nob.txt +225 -0
- data/lib/data/udhr_txt/udhr_not.txt +218 -0
- data/lib/data/udhr_txt/udhr_nso.txt +219 -0
- data/lib/data/udhr_txt/udhr_nya_chechewa.txt +221 -0
- data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +218 -0
- data/lib/data/udhr_txt/udhr_nym.txt +229 -0
- data/lib/data/udhr_txt/udhr_nyn.txt +213 -0
- data/lib/data/udhr_txt/udhr_nzi.txt +221 -0
- data/lib/data/udhr_txt/udhr_ojb.txt +221 -0
- data/lib/data/udhr_txt/udhr_oss.txt +214 -0
- data/lib/data/udhr_txt/udhr_ote.txt +218 -0
- data/lib/data/udhr_txt/udhr_pam.txt +225 -0
- data/lib/data/udhr_txt/udhr_pan.txt +227 -0
- data/lib/data/udhr_txt/udhr_pau.txt +219 -0
- data/lib/data/udhr_txt/udhr_pbb.txt +218 -0
- data/lib/data/udhr_txt/udhr_pbu.txt +9 -0
- data/lib/data/udhr_txt/udhr_pcd.txt +218 -0
- data/lib/data/udhr_txt/udhr_pcm.txt +218 -0
- data/lib/data/udhr_txt/udhr_pes_1.txt +218 -0
- data/lib/data/udhr_txt/udhr_pes_2.txt +222 -0
- data/lib/data/udhr_txt/udhr_pis.txt +219 -0
- data/lib/data/udhr_txt/udhr_plt.txt +214 -0
- data/lib/data/udhr_txt/udhr_pnb.txt +223 -0
- data/lib/data/udhr_txt/udhr_pol.txt +220 -0
- data/lib/data/udhr_txt/udhr_pon.txt +218 -0
- data/lib/data/udhr_txt/udhr_por_BR.txt +231 -0
- data/lib/data/udhr_txt/udhr_por_PT.txt +219 -0
- data/lib/data/udhr_txt/udhr_pov.txt +220 -0
- data/lib/data/udhr_txt/udhr_ppl.txt +219 -0
- data/lib/data/udhr_txt/udhr_prq.txt +151 -0
- data/lib/data/udhr_txt/udhr_prv.txt +207 -0
- data/lib/data/udhr_txt/udhr_quc.txt +217 -0
- data/lib/data/udhr_txt/udhr_qud.txt +218 -0
- data/lib/data/udhr_txt/udhr_quy.txt +221 -0
- data/lib/data/udhr_txt/udhr_quz.txt +223 -0
- data/lib/data/udhr_txt/udhr_qva.txt +219 -0
- data/lib/data/udhr_txt/udhr_qvc.txt +218 -0
- data/lib/data/udhr_txt/udhr_qvh.txt +217 -0
- data/lib/data/udhr_txt/udhr_qvm.txt +219 -0
- data/lib/data/udhr_txt/udhr_qvn.txt +217 -0
- data/lib/data/udhr_txt/udhr_qwh.txt +218 -0
- data/lib/data/udhr_txt/udhr_qxa.txt +217 -0
- data/lib/data/udhr_txt/udhr_qxn.txt +216 -0
- data/lib/data/udhr_txt/udhr_qxu.txt +221 -0
- data/lib/data/udhr_txt/udhr_rar.txt +220 -0
- data/lib/data/udhr_txt/udhr_rmn.txt +220 -0
- data/lib/data/udhr_txt/udhr_rmn_1.txt +221 -0
- data/lib/data/udhr_txt/udhr_rmy.txt +218 -0
- data/lib/data/udhr_txt/udhr_roh.txt +217 -0
- data/lib/data/udhr_txt/udhr_ron_1953.txt +218 -0
- data/lib/data/udhr_txt/udhr_ron_1993.txt +218 -0
- data/lib/data/udhr_txt/udhr_ron_2006.txt +218 -0
- data/lib/data/udhr_txt/udhr_run.txt +218 -0
- data/lib/data/udhr_txt/udhr_rus.txt +220 -0
- data/lib/data/udhr_txt/udhr_sag.txt +220 -0
- data/lib/data/udhr_txt/udhr_san.txt +219 -0
- data/lib/data/udhr_txt/udhr_sco.txt +222 -0
- data/lib/data/udhr_txt/udhr_shp.txt +224 -0
- data/lib/data/udhr_txt/udhr_skr.txt +225 -0
- data/lib/data/udhr_txt/udhr_slk.txt +219 -0
- data/lib/data/udhr_txt/udhr_slv.txt +218 -0
- data/lib/data/udhr_txt/udhr_sme.txt +220 -0
- data/lib/data/udhr_txt/udhr_smo.txt +226 -0
- data/lib/data/udhr_txt/udhr_sna.txt +223 -0
- data/lib/data/udhr_txt/udhr_snk.txt +220 -0
- data/lib/data/udhr_txt/udhr_som.txt +216 -0
- data/lib/data/udhr_txt/udhr_sot.txt +220 -0
- data/lib/data/udhr_txt/udhr_spa.txt +220 -0
- data/lib/data/udhr_txt/udhr_src.txt +220 -0
- data/lib/data/udhr_txt/udhr_srp_cyrl.txt +218 -0
- data/lib/data/udhr_txt/udhr_srp_latn.txt +218 -0
- data/lib/data/udhr_txt/udhr_srr.txt +219 -0
- data/lib/data/udhr_txt/udhr_ssw.txt +228 -0
- data/lib/data/udhr_txt/udhr_suk.txt +218 -0
- data/lib/data/udhr_txt/udhr_sun.txt +227 -0
- data/lib/data/udhr_txt/udhr_sus.txt +218 -0
- data/lib/data/udhr_txt/udhr_swe.txt +224 -0
- data/lib/data/udhr_txt/udhr_swh.txt +221 -0
- data/lib/data/udhr_txt/udhr_tah.txt +217 -0
- data/lib/data/udhr_txt/udhr_taj.txt +10 -0
- data/lib/data/udhr_txt/udhr_tam.txt +227 -0
- data/lib/data/udhr_txt/udhr_tat.txt +219 -0
- data/lib/data/udhr_txt/udhr_tbz.txt +219 -0
- data/lib/data/udhr_txt/udhr_tca.txt +219 -0
- data/lib/data/udhr_txt/udhr_tem.txt +216 -0
- data/lib/data/udhr_txt/udhr_tet.txt +219 -0
- data/lib/data/udhr_txt/udhr_tgk.txt +217 -0
- data/lib/data/udhr_txt/udhr_tgl.txt +224 -0
- data/lib/data/udhr_txt/udhr_tgl_tglg.txt +9 -0
- data/lib/data/udhr_txt/udhr_tha.txt +217 -0
- data/lib/data/udhr_txt/udhr_tir.txt +217 -0
- data/lib/data/udhr_txt/udhr_tiv.txt +232 -0
- data/lib/data/udhr_txt/udhr_tob.txt +218 -0
- data/lib/data/udhr_txt/udhr_toi.txt +216 -0
- data/lib/data/udhr_txt/udhr_toj.txt +219 -0
- data/lib/data/udhr_txt/udhr_ton.txt +221 -0
- data/lib/data/udhr_txt/udhr_top.txt +220 -0
- data/lib/data/udhr_txt/udhr_tpi.txt +219 -0
- data/lib/data/udhr_txt/udhr_tsn.txt +219 -0
- data/lib/data/udhr_txt/udhr_tso_MZ.txt +220 -0
- data/lib/data/udhr_txt/udhr_tsz.txt +218 -0
- data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +216 -0
- data/lib/data/udhr_txt/udhr_tuk_latn.txt +221 -0
- data/lib/data/udhr_txt/udhr_tur.txt +219 -0
- data/lib/data/udhr_txt/udhr_tzc.txt +219 -0
- data/lib/data/udhr_txt/udhr_tzh.txt +218 -0
- data/lib/data/udhr_txt/udhr_tzm.txt +220 -0
- data/lib/data/udhr_txt/udhr_tzm_tfng.txt +9 -0
- data/lib/data/udhr_txt/udhr_uig_arab.txt +219 -0
- data/lib/data/udhr_txt/udhr_uig_latn.txt +219 -0
- data/lib/data/udhr_txt/udhr_ukr.txt +218 -0
- data/lib/data/udhr_txt/udhr_umb.txt +218 -0
- data/lib/data/udhr_txt/udhr_ura.txt +219 -0
- data/lib/data/udhr_txt/udhr_urd.txt +9 -0
- data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +220 -0
- data/lib/data/udhr_txt/udhr_uzn_latn.txt +220 -0
- data/lib/data/udhr_txt/udhr_vai.txt +224 -0
- data/lib/data/udhr_txt/udhr_vie.txt +221 -0
- data/lib/data/udhr_txt/udhr_vmw.txt +220 -0
- data/lib/data/udhr_txt/udhr_war.txt +219 -0
- data/lib/data/udhr_txt/udhr_wln.txt +220 -0
- data/lib/data/udhr_txt/udhr_wol.txt +219 -0
- data/lib/data/udhr_txt/udhr_wwa.txt +109 -0
- data/lib/data/udhr_txt/udhr_xho.txt +219 -0
- data/lib/data/udhr_txt/udhr_xsm.txt +219 -0
- data/lib/data/udhr_txt/udhr_yad.txt +220 -0
- data/lib/data/udhr_txt/udhr_yao.txt +214 -0
- data/lib/data/udhr_txt/udhr_yap.txt +220 -0
- data/lib/data/udhr_txt/udhr_ydd.txt +223 -0
- data/lib/data/udhr_txt/udhr_ykg.txt +211 -0
- data/lib/data/udhr_txt/udhr_yor.txt +218 -0
- data/lib/data/udhr_txt/udhr_yua.txt +218 -0
- data/lib/data/udhr_txt/udhr_zam.txt +223 -0
- data/lib/data/udhr_txt/udhr_ztu.txt +219 -0
- data/lib/data/udhr_txt/udhr_zul.txt +219 -0
- data/lib/profiles/profile_deu_1996.yml +25362 -0
- data/lib/profiles/profile_eng.yml +20794 -0
- data/lib/profiles/profile_fra.yml +24964 -0
- data/lib/profiles/profile_spa.yml +23020 -0
- data/test/babel_test.rb +44 -0
- data/test/profile_test.rb +105 -0
- data/test/string_extensions_test.rb +43 -0
- data/test/test_helper.rb +10 -0
- data/test/train.rb +26 -0
- metadata +440 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Simplificator GmbH
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
#babel
|
2
|
+
|
3
|
+
Babel is a gem to identify in what language a text is written.
|
4
|
+
It is based on the n-gram approach by Cacnar and Trenkle as described in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
|
5
|
+
|
6
|
+
|
7
|
+
##usage
|
8
|
+
require 'rubygems'
|
9
|
+
require 'simplificator-babel'
|
10
|
+
|
11
|
+
# Train babel: feed it some texts
|
12
|
+
'An english text to train and learn'.language= 'en'
|
13
|
+
'Ein deutscher Text'.language= 'de'
|
14
|
+
|
15
|
+
puts
|
16
|
+
|
17
|
+
##Copyright
|
18
|
+
|
19
|
+
Copyright (c) 2009 Simplificator GmbH. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "babel"
|
8
|
+
gem.summary = %Q{Utility to guess the language of a text}
|
9
|
+
gem.email = "info@simplificator.com"
|
10
|
+
gem.homepage = "http://github.com/simplificator/babel"
|
11
|
+
gem.authors = ["simplificator"]
|
12
|
+
gem.add_dependency('ya2yaml', '>= 0.2.6')
|
13
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
14
|
+
end
|
15
|
+
rescue LoadError
|
16
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
Rake::TestTask.new(:test) do |test|
|
21
|
+
test.libs << 'lib' << 'test'
|
22
|
+
test.pattern = 'test/**/*_test.rb'
|
23
|
+
test.verbose = true
|
24
|
+
end
|
25
|
+
|
26
|
+
begin
|
27
|
+
require 'rcov/rcovtask'
|
28
|
+
Rcov::RcovTask.new do |test|
|
29
|
+
test.libs << 'test'
|
30
|
+
test.pattern = 'test/**/*_test.rb'
|
31
|
+
test.verbose = true
|
32
|
+
end
|
33
|
+
rescue LoadError
|
34
|
+
task :rcov do
|
35
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
task :default => :test
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
if File.exist?('VERSION.yml')
|
45
|
+
config = YAML.load(File.read('VERSION.yml'))
|
46
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
47
|
+
else
|
48
|
+
version = ""
|
49
|
+
end
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "babel #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
56
|
+
|
data/VERSION.yml
ADDED
data/lib/babel/babel.rb
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
module Babel
|
2
|
+
@profiles = {}
|
3
|
+
PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
|
4
|
+
def self.learn(lang, text, options = {})
|
5
|
+
lang = lang.to_s
|
6
|
+
profile = @profiles[lang] ||= Profile.new()
|
7
|
+
profile.learn(text, options)
|
8
|
+
end
|
9
|
+
|
10
|
+
|
11
|
+
def self.clear_profiles
|
12
|
+
@profiles = {}
|
13
|
+
end
|
14
|
+
|
15
|
+
def self.guess(source, options = {})
|
16
|
+
found = nil
|
17
|
+
Babel.distances(source).each do |entry|
|
18
|
+
found = entry if found.nil? || entry.last < found.last
|
19
|
+
end
|
20
|
+
found.first if found
|
21
|
+
end
|
22
|
+
|
23
|
+
# An array of arrays of [language, distance] arrays
|
24
|
+
def self.distances(text)
|
25
|
+
source = Profile.new.learn(text)
|
26
|
+
@profiles.map { |lang, target| [lang, source.distance(target)] }
|
27
|
+
end
|
28
|
+
|
29
|
+
private
|
30
|
+
|
31
|
+
def self.file_name(dir, lang)
|
32
|
+
File.join(dir, "profile_#{lang}.yml")
|
33
|
+
end
|
34
|
+
|
35
|
+
# Load a specific profile ()
|
36
|
+
def self.load_profiles(options = {})
|
37
|
+
dir = options[:directory] || PROFILE_DIR
|
38
|
+
Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
|
39
|
+
file =~ /profile_(.+)\.yml/
|
40
|
+
@profiles[$1] = YAML.load_file(file)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.save_profiles(options = {})
|
45
|
+
dir = options[:directory] || PROFILE_DIR
|
46
|
+
@profiles.each do |lang, profile|
|
47
|
+
profile.limit(options[:limit]) if options.has_key?(:limit)
|
48
|
+
File.open(file_name(dir, lang), 'wb') do |file|
|
49
|
+
file.write(profile.ya2yaml)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
@@ -0,0 +1,94 @@
|
|
1
|
+
module Babel
|
2
|
+
class Profile
|
3
|
+
def initialize()
|
4
|
+
@profile = {}
|
5
|
+
@total_occurences = 0
|
6
|
+
end
|
7
|
+
|
8
|
+
def learn(text, options = {})
|
9
|
+
options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
|
10
|
+
text = clean(text)
|
11
|
+
text.split(' ').each do |word|
|
12
|
+
ngrams = word.ngrams(options)
|
13
|
+
ngrams.each do |ngram|
|
14
|
+
self.occured(ngram)
|
15
|
+
end
|
16
|
+
end
|
17
|
+
self.rank
|
18
|
+
self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# TODO: needed?
|
23
|
+
def clean(text)
|
24
|
+
return text
|
25
|
+
text = text.gsub('?', '')
|
26
|
+
text = text.gsub('.', '')
|
27
|
+
text = text.gsub(';', '')
|
28
|
+
text = text.gsub(':', '')
|
29
|
+
text = text.gsub('(', '')
|
30
|
+
text = text.gsub(')', '')
|
31
|
+
text = text.gsub('/', '')
|
32
|
+
text = text.gsub(/[0-9]*/, '')
|
33
|
+
text = text.gsub('+', '')
|
34
|
+
text
|
35
|
+
end
|
36
|
+
# limit this profile to n items
|
37
|
+
# profile needs to be ranked first
|
38
|
+
# do not use this if you plan to extend the profile later on
|
39
|
+
def limit(boundary = 100)
|
40
|
+
@profile.reject! do |key, value|
|
41
|
+
raise 'Please call rank() first' if value.last == 0
|
42
|
+
boundary < value.last
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# rank the current profile
|
47
|
+
# ngrams are sorted by occurence and then ranked
|
48
|
+
def rank
|
49
|
+
@profile.values.sort do |o1, o2|
|
50
|
+
o2.first <=> o1.first
|
51
|
+
end.each_with_index do |item, index|
|
52
|
+
item[1] = index + 1
|
53
|
+
end
|
54
|
+
|
55
|
+
@profile.values.each do |value|
|
56
|
+
value[1] = value[0] / @total_occurences.to_f
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
# Called when a ngram is occured, optional you can pass an
|
61
|
+
# amount (how many times the ngram occured)
|
62
|
+
def occured(ngram, amount = 1)
|
63
|
+
(@profile[ngram] ||= [0, 0])[0] += amount
|
64
|
+
@total_occurences += amount
|
65
|
+
end
|
66
|
+
|
67
|
+
# find the occurence of a ngram. if it never occured, returns 0
|
68
|
+
def occurence(ngram)
|
69
|
+
@profile[ngram] ? @profile[ngram].first : 0
|
70
|
+
end
|
71
|
+
|
72
|
+
# find the ranking of a ngram. if it is not yet ranked, return 0
|
73
|
+
def ranking(ngram)
|
74
|
+
@profile[ngram] ? @profile[ngram].last : 0
|
75
|
+
end
|
76
|
+
|
77
|
+
# Calculate the distance to another profile
|
78
|
+
def distance(other)
|
79
|
+
@profile.inject(0) do |memo, item|
|
80
|
+
other_ranking = other.ranking(item.first)
|
81
|
+
if other_ranking == 0
|
82
|
+
memo += 1
|
83
|
+
else
|
84
|
+
memo += (other_ranking - item.last.last).abs
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def to_s
|
91
|
+
@profile.inspect
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
class String
|
2
|
+
|
3
|
+
# TODO: recursive?
|
4
|
+
def ngrams(options = {})
|
5
|
+
min_length = options[:min_length] || 1
|
6
|
+
max_length = options[:max_length] || self.length
|
7
|
+
pad = options[:pad] || false
|
8
|
+
value = options[:preserve_case] ? self : self.downcase
|
9
|
+
value = "_#{value}#{'_' * (value.length - 1)}" if pad
|
10
|
+
res = []
|
11
|
+
# TODO: use min/max length for loop index instead of looping
|
12
|
+
# all and then use if test to decide if to add or not
|
13
|
+
0.upto(value.length - 1) do |index|
|
14
|
+
index.upto(value.length - 1) do |len|
|
15
|
+
if value[index..len].length >= min_length && value[index..len].length <= max_length
|
16
|
+
res << value[index..len]
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
res
|
21
|
+
end
|
22
|
+
|
23
|
+
# def byte_grams(options = {})
|
24
|
+
# min_length = options[:min_length] || 1
|
25
|
+
# max_length = options[:max_length] || self.length
|
26
|
+
# value = options[:preserve_case] ? self : self.downcase
|
27
|
+
# res = []
|
28
|
+
#
|
29
|
+
# end
|
30
|
+
|
31
|
+
|
32
|
+
# Ask Babel about the language of this text
|
33
|
+
# Can return nil if no language found
|
34
|
+
def language(options = {})
|
35
|
+
Babel.guess(self, options)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Tell Babel that this text is in a given language
|
39
|
+
def language=(lang, options = {})
|
40
|
+
Babel.learn(lang, self, options)
|
41
|
+
end
|
42
|
+
end
|
data/lib/babel.rb
ADDED