simplificator-babel 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +6 -0
- data/README.markdown +50 -7
- data/Rakefile +46 -0
- data/VERSION.yml +2 -2
- data/babel.gemspec +64 -0
- data/lib/babel/babel.rb +84 -23
- data/lib/babel/profile.rb +43 -25
- data/lib/babel/string_extensions.rb +22 -17
- data/lib/profiles/{profile_deu_1996.yml → profile_deu.yml} +2 -1
- data/lib/profiles/profile_eng.yml +2 -1
- data/lib/profiles/profile_fra.yml +2 -1
- data/lib/profiles/profile_ita.yml +22432 -0
- data/lib/profiles/profile_spa.yml +2 -1
- data/lib/profiles/udhr_txt.zip +0 -0
- data/samples/guessing.rb +28 -0
- data/test/babel_test.rb +3 -34
- data/test/string_extensions_test.rb +10 -8
- metadata +8 -361
- data/lib/data/udhr_txt/index.xml +0 -385
- data/lib/data/udhr_txt/udhr_007.txt +0 -220
- data/lib/data/udhr_txt/udhr_008.txt +0 -220
- data/lib/data/udhr_txt/udhr_009.txt +0 -228
- data/lib/data/udhr_txt/udhr_010.txt +0 -219
- data/lib/data/udhr_txt/udhr_011.txt +0 -232
- data/lib/data/udhr_txt/udhr_abk.txt +0 -218
- data/lib/data/udhr_txt/udhr_ace.txt +0 -221
- data/lib/data/udhr_txt/udhr_acu.txt +0 -222
- data/lib/data/udhr_txt/udhr_ada.txt +0 -220
- data/lib/data/udhr_txt/udhr_afr.txt +0 -219
- data/lib/data/udhr_txt/udhr_agr.txt +0 -219
- data/lib/data/udhr_txt/udhr_aii.txt +0 -216
- data/lib/data/udhr_txt/udhr_ajg.txt +0 -219
- data/lib/data/udhr_txt/udhr_aka_akuapem.txt +0 -221
- data/lib/data/udhr_txt/udhr_aka_asante.txt +0 -220
- data/lib/data/udhr_txt/udhr_aka_fante.txt +0 -219
- data/lib/data/udhr_txt/udhr_als.txt +0 -220
- data/lib/data/udhr_txt/udhr_amc.txt +0 -215
- data/lib/data/udhr_txt/udhr_ame.txt +0 -222
- data/lib/data/udhr_txt/udhr_amh.txt +0 -209
- data/lib/data/udhr_txt/udhr_amr.txt +0 -221
- data/lib/data/udhr_txt/udhr_arb.txt +0 -220
- data/lib/data/udhr_txt/udhr_arl.txt +0 -222
- data/lib/data/udhr_txt/udhr_arn.txt +0 -218
- data/lib/data/udhr_txt/udhr_ast.txt +0 -221
- data/lib/data/udhr_txt/udhr_auv.txt +0 -217
- data/lib/data/udhr_txt/udhr_ayr.txt +0 -218
- data/lib/data/udhr_txt/udhr_azj_cyrl.txt +0 -218
- data/lib/data/udhr_txt/udhr_azj_latn.txt +0 -218
- data/lib/data/udhr_txt/udhr_bam.txt +0 -218
- data/lib/data/udhr_txt/udhr_ban.txt +0 -222
- data/lib/data/udhr_txt/udhr_bba.txt +0 -218
- data/lib/data/udhr_txt/udhr_bci.txt +0 -217
- data/lib/data/udhr_txt/udhr_bcl.txt +0 -219
- data/lib/data/udhr_txt/udhr_bel.txt +0 -221
- data/lib/data/udhr_txt/udhr_bem.txt +0 -217
- data/lib/data/udhr_txt/udhr_ben.txt +0 -222
- data/lib/data/udhr_txt/udhr_bho.txt +0 -219
- data/lib/data/udhr_txt/udhr_bin.txt +0 -232
- data/lib/data/udhr_txt/udhr_bis.txt +0 -218
- data/lib/data/udhr_txt/udhr_blu.txt +0 -219
- data/lib/data/udhr_txt/udhr_boa.txt +0 -223
- data/lib/data/udhr_txt/udhr_bod.txt +0 -221
- data/lib/data/udhr_txt/udhr_bos_cyrl.txt +0 -220
- data/lib/data/udhr_txt/udhr_bos_latn.txt +0 -220
- data/lib/data/udhr_txt/udhr_bre.txt +0 -222
- data/lib/data/udhr_txt/udhr_btb.txt +0 -217
- data/lib/data/udhr_txt/udhr_bug.txt +0 -222
- data/lib/data/udhr_txt/udhr_bul.txt +0 -218
- data/lib/data/udhr_txt/udhr_cab.txt +0 -221
- data/lib/data/udhr_txt/udhr_cak.txt +0 -217
- data/lib/data/udhr_txt/udhr_cat.txt +0 -220
- data/lib/data/udhr_txt/udhr_cbr.txt +0 -219
- data/lib/data/udhr_txt/udhr_cbs.txt +0 -153
- data/lib/data/udhr_txt/udhr_cbt.txt +0 -220
- data/lib/data/udhr_txt/udhr_cbu.txt +0 -218
- data/lib/data/udhr_txt/udhr_ccx.txt +0 -222
- data/lib/data/udhr_txt/udhr_ceb.txt +0 -218
- data/lib/data/udhr_txt/udhr_ces.txt +0 -221
- data/lib/data/udhr_txt/udhr_cha.txt +0 -219
- data/lib/data/udhr_txt/udhr_chj.txt +0 -220
- data/lib/data/udhr_txt/udhr_chk.txt +0 -220
- data/lib/data/udhr_txt/udhr_chr.txt +0 -10
- data/lib/data/udhr_txt/udhr_cic.txt +0 -220
- data/lib/data/udhr_txt/udhr_cjk.txt +0 -218
- data/lib/data/udhr_txt/udhr_cjk_AO.txt +0 -220
- data/lib/data/udhr_txt/udhr_ckb.txt +0 -217
- data/lib/data/udhr_txt/udhr_cmn_hans.txt +0 -220
- data/lib/data/udhr_txt/udhr_cmn_hant.txt +0 -220
- data/lib/data/udhr_txt/udhr_cnh.txt +0 -220
- data/lib/data/udhr_txt/udhr_cni.txt +0 -220
- data/lib/data/udhr_txt/udhr_cos.txt +0 -218
- data/lib/data/udhr_txt/udhr_cot.txt +0 -222
- data/lib/data/udhr_txt/udhr_cpu.txt +0 -219
- data/lib/data/udhr_txt/udhr_crs.txt +0 -217
- data/lib/data/udhr_txt/udhr_csa.txt +0 -223
- data/lib/data/udhr_txt/udhr_csw.txt +0 -163
- data/lib/data/udhr_txt/udhr_ctd.txt +0 -222
- data/lib/data/udhr_txt/udhr_cym.txt +0 -222
- data/lib/data/udhr_txt/udhr_dag.txt +0 -217
- data/lib/data/udhr_txt/udhr_dan.txt +0 -224
- data/lib/data/udhr_txt/udhr_ddn.txt +0 -217
- data/lib/data/udhr_txt/udhr_deu_1901.txt +0 -220
- data/lib/data/udhr_txt/udhr_deu_1996.txt +0 -220
- data/lib/data/udhr_txt/udhr_dga.txt +0 -220
- data/lib/data/udhr_txt/udhr_dip.txt +0 -217
- data/lib/data/udhr_txt/udhr_div.txt +0 -220
- data/lib/data/udhr_txt/udhr_dyo.txt +0 -217
- data/lib/data/udhr_txt/udhr_dzo.txt +0 -9
- data/lib/data/udhr_txt/udhr_ell_monotonic.txt +0 -220
- data/lib/data/udhr_txt/udhr_ell_polytonic.txt +0 -220
- data/lib/data/udhr_txt/udhr_emk.txt +0 -218
- data/lib/data/udhr_txt/udhr_eml.txt +0 -219
- data/lib/data/udhr_txt/udhr_eng.txt +0 -219
- data/lib/data/udhr_txt/udhr_epo.txt +0 -221
- data/lib/data/udhr_txt/udhr_est.txt +0 -219
- data/lib/data/udhr_txt/udhr_eus.txt +0 -220
- data/lib/data/udhr_txt/udhr_eve.txt +0 -207
- data/lib/data/udhr_txt/udhr_ewe.txt +0 -218
- data/lib/data/udhr_txt/udhr_fao.txt +0 -219
- data/lib/data/udhr_txt/udhr_fij.txt +0 -224
- data/lib/data/udhr_txt/udhr_fin.txt +0 -224
- data/lib/data/udhr_txt/udhr_flm.txt +0 -219
- data/lib/data/udhr_txt/udhr_fon.txt +0 -217
- data/lib/data/udhr_txt/udhr_fra.txt +0 -218
- data/lib/data/udhr_txt/udhr_fri.txt +0 -219
- data/lib/data/udhr_txt/udhr_fuc.txt +0 -217
- data/lib/data/udhr_txt/udhr_fur.txt +0 -220
- data/lib/data/udhr_txt/udhr_gaa.txt +0 -220
- data/lib/data/udhr_txt/udhr_gag.txt +0 -223
- data/lib/data/udhr_txt/udhr_gax.txt +0 -222
- data/lib/data/udhr_txt/udhr_gjn.txt +0 -220
- data/lib/data/udhr_txt/udhr_gkp.txt +0 -216
- data/lib/data/udhr_txt/udhr_gla.txt +0 -229
- data/lib/data/udhr_txt/udhr_gle.txt +0 -215
- data/lib/data/udhr_txt/udhr_glg.txt +0 -217
- data/lib/data/udhr_txt/udhr_guc.txt +0 -221
- data/lib/data/udhr_txt/udhr_gug.txt +0 -210
- data/lib/data/udhr_txt/udhr_guj.txt +0 -219
- data/lib/data/udhr_txt/udhr_gyr.txt +0 -203
- data/lib/data/udhr_txt/udhr_hat_kreyol.txt +0 -221
- data/lib/data/udhr_txt/udhr_hat_popular.txt +0 -221
- data/lib/data/udhr_txt/udhr_hau_NE.txt +0 -219
- data/lib/data/udhr_txt/udhr_hau_NG.txt +0 -219
- data/lib/data/udhr_txt/udhr_haw.txt +0 -219
- data/lib/data/udhr_txt/udhr_hea.txt +0 -219
- data/lib/data/udhr_txt/udhr_heb.txt +0 -216
- data/lib/data/udhr_txt/udhr_hil.txt +0 -217
- data/lib/data/udhr_txt/udhr_hin.txt +0 -222
- data/lib/data/udhr_txt/udhr_hms.txt +0 -219
- data/lib/data/udhr_txt/udhr_hna.txt +0 -217
- data/lib/data/udhr_txt/udhr_hni.txt +0 -218
- data/lib/data/udhr_txt/udhr_hrv.txt +0 -218
- data/lib/data/udhr_txt/udhr_hsb.txt +0 -220
- data/lib/data/udhr_txt/udhr_hun.txt +0 -218
- data/lib/data/udhr_txt/udhr_hus.txt +0 -222
- data/lib/data/udhr_txt/udhr_huu.txt +0 -220
- data/lib/data/udhr_txt/udhr_hva.txt +0 -220
- data/lib/data/udhr_txt/udhr_hye.txt +0 -234
- data/lib/data/udhr_txt/udhr_ibb.txt +0 -235
- data/lib/data/udhr_txt/udhr_ibo.txt +0 -219
- data/lib/data/udhr_txt/udhr_ido.txt +0 -224
- data/lib/data/udhr_txt/udhr_iii.txt +0 -9
- data/lib/data/udhr_txt/udhr_ike.txt +0 -163
- data/lib/data/udhr_txt/udhr_ilo.txt +0 -217
- data/lib/data/udhr_txt/udhr_ina.txt +0 -220
- data/lib/data/udhr_txt/udhr_ind.txt +0 -219
- data/lib/data/udhr_txt/udhr_isl.txt +0 -217
- data/lib/data/udhr_txt/udhr_ita.txt +0 -221
- data/lib/data/udhr_txt/udhr_jav.txt +0 -222
- data/lib/data/udhr_txt/udhr_jpn.txt +0 -219
- data/lib/data/udhr_txt/udhr_kal.txt +0 -218
- data/lib/data/udhr_txt/udhr_kan.txt +0 -216
- data/lib/data/udhr_txt/udhr_kat.txt +0 -221
- data/lib/data/udhr_txt/udhr_kaz.txt +0 -218
- data/lib/data/udhr_txt/udhr_kbp.txt +0 -218
- data/lib/data/udhr_txt/udhr_kde.txt +0 -212
- data/lib/data/udhr_txt/udhr_kea.txt +0 -219
- data/lib/data/udhr_txt/udhr_kek.txt +0 -219
- data/lib/data/udhr_txt/udhr_khk.txt +0 -217
- data/lib/data/udhr_txt/udhr_khk_mong.txt +0 -11
- data/lib/data/udhr_txt/udhr_khm.txt +0 -220
- data/lib/data/udhr_txt/udhr_kin.txt +0 -220
- data/lib/data/udhr_txt/udhr_kir.txt +0 -220
- data/lib/data/udhr_txt/udhr_kmb.txt +0 -219
- data/lib/data/udhr_txt/udhr_knc.txt +0 -230
- data/lib/data/udhr_txt/udhr_kng.txt +0 -219
- data/lib/data/udhr_txt/udhr_kng_AO.txt +0 -219
- data/lib/data/udhr_txt/udhr_koo.txt +0 -216
- data/lib/data/udhr_txt/udhr_kor.txt +0 -219
- data/lib/data/udhr_txt/udhr_kqn.txt +0 -218
- data/lib/data/udhr_txt/udhr_kri.txt +0 -226
- data/lib/data/udhr_txt/udhr_ktu.txt +0 -219
- data/lib/data/udhr_txt/udhr_lao.txt +0 -223
- data/lib/data/udhr_txt/udhr_lat.txt +0 -221
- data/lib/data/udhr_txt/udhr_lat_1.txt +0 -220
- data/lib/data/udhr_txt/udhr_lav.txt +0 -220
- data/lib/data/udhr_txt/udhr_lia.txt +0 -218
- data/lib/data/udhr_txt/udhr_lin.txt +0 -217
- data/lib/data/udhr_txt/udhr_lin_tones.txt +0 -214
- data/lib/data/udhr_txt/udhr_lit.txt +0 -218
- data/lib/data/udhr_txt/udhr_lnc.txt +0 -219
- data/lib/data/udhr_txt/udhr_lns.txt +0 -219
- data/lib/data/udhr_txt/udhr_loz.txt +0 -219
- data/lib/data/udhr_txt/udhr_ltz.txt +0 -218
- data/lib/data/udhr_txt/udhr_lua.txt +0 -219
- data/lib/data/udhr_txt/udhr_lue.txt +0 -217
- data/lib/data/udhr_txt/udhr_lug.txt +0 -216
- data/lib/data/udhr_txt/udhr_lun.txt +0 -216
- data/lib/data/udhr_txt/udhr_mad.txt +0 -223
- data/lib/data/udhr_txt/udhr_mag.txt +0 -220
- data/lib/data/udhr_txt/udhr_mah.txt +0 -220
- data/lib/data/udhr_txt/udhr_mai.txt +0 -223
- data/lib/data/udhr_txt/udhr_mal.txt +0 -210
- data/lib/data/udhr_txt/udhr_mam.txt +0 -218
- data/lib/data/udhr_txt/udhr_mar.txt +0 -219
- data/lib/data/udhr_txt/udhr_maz.txt +0 -218
- data/lib/data/udhr_txt/udhr_mcd.txt +0 -220
- data/lib/data/udhr_txt/udhr_mcf.txt +0 -223
- data/lib/data/udhr_txt/udhr_men.txt +0 -222
- data/lib/data/udhr_txt/udhr_mic.txt +0 -218
- data/lib/data/udhr_txt/udhr_min.txt +0 -221
- data/lib/data/udhr_txt/udhr_miq.txt +0 -213
- data/lib/data/udhr_txt/udhr_mkd.txt +0 -221
- data/lib/data/udhr_txt/udhr_mlt.txt +0 -217
- data/lib/data/udhr_txt/udhr_mly_arab.txt +0 -219
- data/lib/data/udhr_txt/udhr_mly_latn.txt +0 -218
- data/lib/data/udhr_txt/udhr_mos.txt +0 -216
- data/lib/data/udhr_txt/udhr_mri.txt +0 -219
- data/lib/data/udhr_txt/udhr_mxi.txt +0 -218
- data/lib/data/udhr_txt/udhr_mxv.txt +0 -223
- data/lib/data/udhr_txt/udhr_mya.txt +0 -219
- data/lib/data/udhr_txt/udhr_mzi.txt +0 -227
- data/lib/data/udhr_txt/udhr_nav.txt +0 -219
- data/lib/data/udhr_txt/udhr_nba.txt +0 -257
- data/lib/data/udhr_txt/udhr_nbl.txt +0 -218
- data/lib/data/udhr_txt/udhr_ndo.txt +0 -217
- data/lib/data/udhr_txt/udhr_nep.txt +0 -214
- data/lib/data/udhr_txt/udhr_nhn.txt +0 -221
- data/lib/data/udhr_txt/udhr_nld.txt +0 -217
- data/lib/data/udhr_txt/udhr_nno.txt +0 -219
- data/lib/data/udhr_txt/udhr_nob.txt +0 -225
- data/lib/data/udhr_txt/udhr_not.txt +0 -218
- data/lib/data/udhr_txt/udhr_nso.txt +0 -219
- data/lib/data/udhr_txt/udhr_nya_chechewa.txt +0 -221
- data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +0 -218
- data/lib/data/udhr_txt/udhr_nym.txt +0 -229
- data/lib/data/udhr_txt/udhr_nyn.txt +0 -213
- data/lib/data/udhr_txt/udhr_nzi.txt +0 -221
- data/lib/data/udhr_txt/udhr_ojb.txt +0 -221
- data/lib/data/udhr_txt/udhr_oss.txt +0 -214
- data/lib/data/udhr_txt/udhr_ote.txt +0 -218
- data/lib/data/udhr_txt/udhr_pam.txt +0 -225
- data/lib/data/udhr_txt/udhr_pan.txt +0 -227
- data/lib/data/udhr_txt/udhr_pau.txt +0 -219
- data/lib/data/udhr_txt/udhr_pbb.txt +0 -218
- data/lib/data/udhr_txt/udhr_pbu.txt +0 -9
- data/lib/data/udhr_txt/udhr_pcd.txt +0 -218
- data/lib/data/udhr_txt/udhr_pcm.txt +0 -218
- data/lib/data/udhr_txt/udhr_pes_1.txt +0 -218
- data/lib/data/udhr_txt/udhr_pes_2.txt +0 -222
- data/lib/data/udhr_txt/udhr_pis.txt +0 -219
- data/lib/data/udhr_txt/udhr_plt.txt +0 -214
- data/lib/data/udhr_txt/udhr_pnb.txt +0 -223
- data/lib/data/udhr_txt/udhr_pol.txt +0 -220
- data/lib/data/udhr_txt/udhr_pon.txt +0 -218
- data/lib/data/udhr_txt/udhr_por_BR.txt +0 -231
- data/lib/data/udhr_txt/udhr_por_PT.txt +0 -219
- data/lib/data/udhr_txt/udhr_pov.txt +0 -220
- data/lib/data/udhr_txt/udhr_ppl.txt +0 -219
- data/lib/data/udhr_txt/udhr_prq.txt +0 -151
- data/lib/data/udhr_txt/udhr_prv.txt +0 -207
- data/lib/data/udhr_txt/udhr_quc.txt +0 -217
- data/lib/data/udhr_txt/udhr_qud.txt +0 -218
- data/lib/data/udhr_txt/udhr_quy.txt +0 -221
- data/lib/data/udhr_txt/udhr_quz.txt +0 -223
- data/lib/data/udhr_txt/udhr_qva.txt +0 -219
- data/lib/data/udhr_txt/udhr_qvc.txt +0 -218
- data/lib/data/udhr_txt/udhr_qvh.txt +0 -217
- data/lib/data/udhr_txt/udhr_qvm.txt +0 -219
- data/lib/data/udhr_txt/udhr_qvn.txt +0 -217
- data/lib/data/udhr_txt/udhr_qwh.txt +0 -218
- data/lib/data/udhr_txt/udhr_qxa.txt +0 -217
- data/lib/data/udhr_txt/udhr_qxn.txt +0 -216
- data/lib/data/udhr_txt/udhr_qxu.txt +0 -221
- data/lib/data/udhr_txt/udhr_rar.txt +0 -220
- data/lib/data/udhr_txt/udhr_rmn.txt +0 -220
- data/lib/data/udhr_txt/udhr_rmn_1.txt +0 -221
- data/lib/data/udhr_txt/udhr_rmy.txt +0 -218
- data/lib/data/udhr_txt/udhr_roh.txt +0 -217
- data/lib/data/udhr_txt/udhr_ron_1953.txt +0 -218
- data/lib/data/udhr_txt/udhr_ron_1993.txt +0 -218
- data/lib/data/udhr_txt/udhr_ron_2006.txt +0 -218
- data/lib/data/udhr_txt/udhr_run.txt +0 -218
- data/lib/data/udhr_txt/udhr_rus.txt +0 -220
- data/lib/data/udhr_txt/udhr_sag.txt +0 -220
- data/lib/data/udhr_txt/udhr_san.txt +0 -219
- data/lib/data/udhr_txt/udhr_sco.txt +0 -222
- data/lib/data/udhr_txt/udhr_shp.txt +0 -224
- data/lib/data/udhr_txt/udhr_skr.txt +0 -225
- data/lib/data/udhr_txt/udhr_slk.txt +0 -219
- data/lib/data/udhr_txt/udhr_slv.txt +0 -218
- data/lib/data/udhr_txt/udhr_sme.txt +0 -220
- data/lib/data/udhr_txt/udhr_smo.txt +0 -226
- data/lib/data/udhr_txt/udhr_sna.txt +0 -223
- data/lib/data/udhr_txt/udhr_snk.txt +0 -220
- data/lib/data/udhr_txt/udhr_som.txt +0 -216
- data/lib/data/udhr_txt/udhr_sot.txt +0 -220
- data/lib/data/udhr_txt/udhr_spa.txt +0 -220
- data/lib/data/udhr_txt/udhr_src.txt +0 -220
- data/lib/data/udhr_txt/udhr_srp_cyrl.txt +0 -218
- data/lib/data/udhr_txt/udhr_srp_latn.txt +0 -218
- data/lib/data/udhr_txt/udhr_srr.txt +0 -219
- data/lib/data/udhr_txt/udhr_ssw.txt +0 -228
- data/lib/data/udhr_txt/udhr_suk.txt +0 -218
- data/lib/data/udhr_txt/udhr_sun.txt +0 -227
- data/lib/data/udhr_txt/udhr_sus.txt +0 -218
- data/lib/data/udhr_txt/udhr_swe.txt +0 -224
- data/lib/data/udhr_txt/udhr_swh.txt +0 -221
- data/lib/data/udhr_txt/udhr_tah.txt +0 -217
- data/lib/data/udhr_txt/udhr_taj.txt +0 -10
- data/lib/data/udhr_txt/udhr_tam.txt +0 -227
- data/lib/data/udhr_txt/udhr_tat.txt +0 -219
- data/lib/data/udhr_txt/udhr_tbz.txt +0 -219
- data/lib/data/udhr_txt/udhr_tca.txt +0 -219
- data/lib/data/udhr_txt/udhr_tem.txt +0 -216
- data/lib/data/udhr_txt/udhr_tet.txt +0 -219
- data/lib/data/udhr_txt/udhr_tgk.txt +0 -217
- data/lib/data/udhr_txt/udhr_tgl.txt +0 -224
- data/lib/data/udhr_txt/udhr_tgl_tglg.txt +0 -9
- data/lib/data/udhr_txt/udhr_tha.txt +0 -217
- data/lib/data/udhr_txt/udhr_tir.txt +0 -217
- data/lib/data/udhr_txt/udhr_tiv.txt +0 -232
- data/lib/data/udhr_txt/udhr_tob.txt +0 -218
- data/lib/data/udhr_txt/udhr_toi.txt +0 -216
- data/lib/data/udhr_txt/udhr_toj.txt +0 -219
- data/lib/data/udhr_txt/udhr_ton.txt +0 -221
- data/lib/data/udhr_txt/udhr_top.txt +0 -220
- data/lib/data/udhr_txt/udhr_tpi.txt +0 -219
- data/lib/data/udhr_txt/udhr_tsn.txt +0 -219
- data/lib/data/udhr_txt/udhr_tso_MZ.txt +0 -220
- data/lib/data/udhr_txt/udhr_tsz.txt +0 -218
- data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +0 -216
- data/lib/data/udhr_txt/udhr_tuk_latn.txt +0 -221
- data/lib/data/udhr_txt/udhr_tur.txt +0 -219
- data/lib/data/udhr_txt/udhr_tzc.txt +0 -219
- data/lib/data/udhr_txt/udhr_tzh.txt +0 -218
- data/lib/data/udhr_txt/udhr_tzm.txt +0 -220
- data/lib/data/udhr_txt/udhr_tzm_tfng.txt +0 -9
- data/lib/data/udhr_txt/udhr_uig_arab.txt +0 -219
- data/lib/data/udhr_txt/udhr_uig_latn.txt +0 -219
- data/lib/data/udhr_txt/udhr_ukr.txt +0 -218
- data/lib/data/udhr_txt/udhr_umb.txt +0 -218
- data/lib/data/udhr_txt/udhr_ura.txt +0 -219
- data/lib/data/udhr_txt/udhr_urd.txt +0 -9
- data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +0 -220
- data/lib/data/udhr_txt/udhr_uzn_latn.txt +0 -220
- data/lib/data/udhr_txt/udhr_vai.txt +0 -224
- data/lib/data/udhr_txt/udhr_vie.txt +0 -221
- data/lib/data/udhr_txt/udhr_vmw.txt +0 -220
- data/lib/data/udhr_txt/udhr_war.txt +0 -219
- data/lib/data/udhr_txt/udhr_wln.txt +0 -220
- data/lib/data/udhr_txt/udhr_wol.txt +0 -219
- data/lib/data/udhr_txt/udhr_wwa.txt +0 -109
- data/lib/data/udhr_txt/udhr_xho.txt +0 -219
- data/lib/data/udhr_txt/udhr_xsm.txt +0 -219
- data/lib/data/udhr_txt/udhr_yad.txt +0 -220
- data/lib/data/udhr_txt/udhr_yao.txt +0 -214
- data/lib/data/udhr_txt/udhr_yap.txt +0 -220
- data/lib/data/udhr_txt/udhr_ydd.txt +0 -223
- data/lib/data/udhr_txt/udhr_ykg.txt +0 -211
- data/lib/data/udhr_txt/udhr_yor.txt +0 -218
- data/lib/data/udhr_txt/udhr_yua.txt +0 -218
- data/lib/data/udhr_txt/udhr_zam.txt +0 -223
- data/lib/data/udhr_txt/udhr_ztu.txt +0 -219
- data/lib/data/udhr_txt/udhr_zul.txt +0 -219
- data/test/train.rb +0 -26
data/.document
ADDED
data/.gitignore
ADDED
data/README.markdown
CHANGED
@@ -1,18 +1,61 @@
|
|
1
1
|
#babel
|
2
2
|
|
3
3
|
Babel is a gem to identify in what language a text is written.
|
4
|
-
It is based on the n-gram approach by
|
4
|
+
It is based on the n-gram approach by Cavnar and Trenkle as described
|
5
|
+
in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
|
5
6
|
|
6
7
|
|
7
8
|
##usage
|
8
9
|
require 'rubygems'
|
9
|
-
require '
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
10
|
+
require 'babel'
|
11
|
+
|
12
|
+
def guess_language(s)
|
13
|
+
puts "'#{s}' is probably '#{s.language}'"
|
14
|
+
end
|
15
|
+
# load the default profiles
|
16
|
+
Babel.load_profiles
|
17
|
+
|
18
|
+
# Let's see what Babel thinks about these texts
|
19
|
+
guess_language 'Montags ist es ruhig'
|
20
|
+
guess_language 'le coq est mort'
|
14
21
|
|
15
|
-
|
22
|
+
# Replace a profile with my own profile
|
23
|
+
Babel.load_profile('eng', '/path/to/my/english/profile.yml')
|
24
|
+
|
25
|
+
# Merge profile data
|
26
|
+
Babel.load_profile('eng', '/path/to/my/other/english/profile.yml', :merge => true)
|
27
|
+
|
28
|
+
# Show Top-3 Languages for a sentence
|
29
|
+
puts "What language could this be written in?".languages[0..2]
|
30
|
+
|
31
|
+
##profiles
|
32
|
+
Profiles are collections of n-grams and the number of occurence of each ngram.
|
33
|
+
Babel uses n-grams with length 2-5 (bigram, trigram, tetragram, pentagram).
|
34
|
+
You can create your own profile and decide what n-grams to use and whether
|
35
|
+
you want to limit or not if you want to.
|
36
|
+
|
37
|
+
These profiles are shipped with the gem:
|
38
|
+
* german (deu) (this profile is built from udhr_deu_1996.txt)
|
39
|
+
* english (eng)
|
40
|
+
* french (fra)
|
41
|
+
* spanish (spa)
|
42
|
+
* italian (ita)
|
43
|
+
|
44
|
+
Want another profile built in? Send an email to info@simplificator.com and if there are enough
|
45
|
+
requests we add the profile.
|
46
|
+
|
47
|
+
The profiles that are shipped with babel are based on the texts found at
|
48
|
+
http://www.unicode.org/udhr/index_by_code.html
|
49
|
+
|
50
|
+
##generating profiles
|
51
|
+
Profiles can be generated with the data found in http://www.unicode.org/udhr/assemblies/udhr_txt.zip or with any other text.
|
52
|
+
Once a profile is generated, Babel can store it in YAML format and load it again from YAML.
|
53
|
+
|
54
|
+
there is a rake task which simplifies profile generation:
|
55
|
+
rake babel:build_profile lang=foo file=myfile.txt dir=destination-directory
|
56
|
+
|
57
|
+
the file which is generated from this command can be loaded by
|
58
|
+
Babel.load_profile 'foo', 'profile_foo.yml'
|
16
59
|
|
17
60
|
##Copyright
|
18
61
|
|
data/Rakefile
CHANGED
@@ -10,6 +10,9 @@ begin
|
|
10
10
|
gem.homepage = "http://github.com/simplificator/babel"
|
11
11
|
gem.authors = ["simplificator"]
|
12
12
|
gem.add_dependency('ya2yaml', '>= 0.2.6')
|
13
|
+
gem.files.exclude 'lib/data'
|
14
|
+
#gem.files.exclude 'lib/data/*.xml'
|
15
|
+
gem.files.include 'lib/data/*.zip'
|
13
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
14
17
|
end
|
15
18
|
rescue LoadError
|
@@ -53,4 +56,47 @@ Rake::RDocTask.new do |rdoc|
|
|
53
56
|
rdoc.rdoc_files.include('README*')
|
54
57
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
58
|
end
|
59
|
+
require 'rubygems'
|
60
|
+
require 'zip/zip'
|
61
|
+
require 'lib/babel'
|
62
|
+
|
63
|
+
namespace :babel do
|
64
|
+
task :unpack_data do
|
65
|
+
dir = File.join(File.dirname(__FILE__), 'lib', 'data')
|
66
|
+
file = File.join(dir, 'udhr_txt.zip')
|
67
|
+
Zip::ZipFile.open(file) do |zip|
|
68
|
+
zip.each do |entry|
|
69
|
+
destination = File.join(dir, entry.name)
|
70
|
+
FileUtils.mkdir_p(File.dirname(destination))
|
71
|
+
FileUtils.rm(destination) if File.exists?(destination)
|
72
|
+
zip.extract(entry, destination)
|
73
|
+
end
|
74
|
+
end
|
75
|
+
FileUtils.cp(File.join(dir, 'udhr_deu_1996.txt'), File.join(dir, 'udhr_deu.txt'))
|
76
|
+
end
|
77
|
+
|
78
|
+
task :build_profile do
|
79
|
+
if ENV['lang']
|
80
|
+
lang = ENV['lang']
|
81
|
+
file = ENV['file']
|
82
|
+
dir = ENV['dir'] || File.dirname(__FILE__)
|
83
|
+
skip = ENV['skip']
|
84
|
+
limit = ENV['limit']
|
85
|
+
unless file
|
86
|
+
skip ||= 5 # skip header in data files. english all the time
|
87
|
+
file = File.join(File.dirname(__FILE__), 'lib', 'data', "udhr_#{lang}.txt")
|
88
|
+
end
|
89
|
+
puts "Learning about #{lang} from #{file} and save it to #{dir}"
|
90
|
+
File.open(file, 'r') do |f|
|
91
|
+
f.each_with_index do |line, index|
|
92
|
+
if index > skip
|
93
|
+
Babel.learn(lang, line)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
97
|
+
Babel.save_profile(lang, :dir => dir, :limit => limit)
|
98
|
+
end
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
56
102
|
|
data/VERSION.yml
CHANGED
data/babel.gemspec
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{babel}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["simplificator"]
|
9
|
+
s.date = %q{2009-07-13}
|
10
|
+
s.email = %q{info@simplificator.com}
|
11
|
+
s.extra_rdoc_files = [
|
12
|
+
"LICENSE",
|
13
|
+
"README.markdown"
|
14
|
+
]
|
15
|
+
s.files = [
|
16
|
+
".document",
|
17
|
+
".gitignore",
|
18
|
+
"LICENSE",
|
19
|
+
"README.markdown",
|
20
|
+
"Rakefile",
|
21
|
+
"VERSION.yml",
|
22
|
+
"babel.gemspec",
|
23
|
+
"lib/babel.rb",
|
24
|
+
"lib/babel/babel.rb",
|
25
|
+
"lib/babel/profile.rb",
|
26
|
+
"lib/babel/string_extensions.rb",
|
27
|
+
"lib/profiles/profile_deu.yml",
|
28
|
+
"lib/profiles/profile_eng.yml",
|
29
|
+
"lib/profiles/profile_fra.yml",
|
30
|
+
"lib/profiles/profile_ita.yml",
|
31
|
+
"lib/profiles/profile_spa.yml",
|
32
|
+
"lib/profiles/udhr_txt.zip",
|
33
|
+
"samples/guessing.rb",
|
34
|
+
"test/babel_test.rb",
|
35
|
+
"test/profile_test.rb",
|
36
|
+
"test/string_extensions_test.rb",
|
37
|
+
"test/test_helper.rb"
|
38
|
+
]
|
39
|
+
s.has_rdoc = true
|
40
|
+
s.homepage = %q{http://github.com/simplificator/babel}
|
41
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
42
|
+
s.require_paths = ["lib"]
|
43
|
+
s.rubygems_version = %q{1.3.2}
|
44
|
+
s.summary = %q{Utility to guess the language of a text}
|
45
|
+
s.test_files = [
|
46
|
+
"test/babel_test.rb",
|
47
|
+
"test/profile_test.rb",
|
48
|
+
"test/string_extensions_test.rb",
|
49
|
+
"test/test_helper.rb"
|
50
|
+
]
|
51
|
+
|
52
|
+
if s.respond_to? :specification_version then
|
53
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
54
|
+
s.specification_version = 3
|
55
|
+
|
56
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
57
|
+
s.add_runtime_dependency(%q<ya2yaml>, [">= 0.2.6"])
|
58
|
+
else
|
59
|
+
s.add_dependency(%q<ya2yaml>, [">= 0.2.6"])
|
60
|
+
end
|
61
|
+
else
|
62
|
+
s.add_dependency(%q<ya2yaml>, [">= 0.2.6"])
|
63
|
+
end
|
64
|
+
end
|
data/lib/babel/babel.rb
CHANGED
@@ -1,54 +1,115 @@
|
|
1
|
+
#
|
2
|
+
#
|
3
|
+
# Profile Generation:
|
4
|
+
# Whenever it's about generating a Profile (Babel.learn, Babel.distances and Babel.guess)
|
5
|
+
# you can pass
|
6
|
+
# * :min_length (2)
|
7
|
+
# * :max_length (5)
|
8
|
+
# * :pad (true)
|
9
|
+
# They are just forwared to String.n_grams (default values in braces)
|
10
|
+
# It's highly recomended that you use the same settings for learning and guessing....
|
11
|
+
|
12
|
+
|
13
|
+
|
1
14
|
module Babel
|
2
15
|
@profiles = {}
|
3
16
|
PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
|
17
|
+
|
18
|
+
# Learn that a text is in a given language.
|
19
|
+
# Calls Profile.learn for the profile with the given language.
|
4
20
|
def self.learn(lang, text, options = {})
|
5
21
|
lang = lang.to_s
|
6
|
-
profile = @profiles[lang] ||= Profile.new()
|
22
|
+
profile = @profiles[lang] ||= Profile.new(lang)
|
7
23
|
profile.learn(text, options)
|
8
24
|
end
|
9
25
|
|
10
|
-
|
26
|
+
# Clear all the profiles
|
11
27
|
def self.clear_profiles
|
12
28
|
@profiles = {}
|
13
29
|
end
|
30
|
+
# find the profile for a language
|
31
|
+
def self.profile(lang)
|
32
|
+
@profiles[lang]
|
33
|
+
end
|
14
34
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
35
|
+
# register a profile
|
36
|
+
# pass :merge => true to merge into an existing profile
|
37
|
+
def self.register_profile(profile, options = {})
|
38
|
+
if options[:merge] && @profiles[profile.language]
|
39
|
+
@profiles[profile.language].merge(profile)
|
40
|
+
else
|
41
|
+
@profiles[profile.language] = profile
|
19
42
|
end
|
20
|
-
found.first if found
|
21
43
|
end
|
22
44
|
|
23
|
-
#
|
24
|
-
|
25
|
-
|
26
|
-
|
45
|
+
# Guess the language of a text.
|
46
|
+
# As soon as there is at least one profile, this method always
|
47
|
+
# returns a value (perhaps the wrong) one...
|
48
|
+
# I.e. if only "eng" profile is registered, then this method will always retun "eng"
|
49
|
+
# not matter what text pass
|
50
|
+
#
|
51
|
+
def self.guess(source, options = {})
|
52
|
+
distances = Babel.distances(source, options)
|
53
|
+
distances.first.first if distances.first
|
27
54
|
end
|
28
|
-
|
29
|
-
private
|
30
55
|
|
31
|
-
|
32
|
-
|
56
|
+
# An array of arrays of [language, distance] arrays.
|
57
|
+
# The language with the shortest distance is the most probable solution.
|
58
|
+
# Sorted by distance, ascending (first item is most probable)
|
59
|
+
def self.distances(text, options = {})
|
60
|
+
source = Profile.new.learn(text, options)
|
61
|
+
@profiles.map { |lang, target| [lang, source.distance(target)] }.sort {|o1, o2| o1.last <=> o2.last}
|
33
62
|
end
|
34
63
|
|
35
|
-
|
64
|
+
|
65
|
+
# Load all the profiles from a given directory.
|
66
|
+
# Loads all .yml files so be careful what directory you specify.
|
67
|
+
# options are:
|
68
|
+
# * :dir the directory, defaults to Babel::PROFILE_DIR
|
69
|
+
# See Babel.load_profile() for other options
|
36
70
|
def self.load_profiles(options = {})
|
37
71
|
dir = options[:directory] || PROFILE_DIR
|
38
72
|
Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
|
39
|
-
file
|
40
|
-
@profiles[$1] = YAML.load_file(file)
|
73
|
+
Babel.load_profile(file, options)
|
41
74
|
end
|
42
75
|
end
|
43
76
|
|
77
|
+
# Load a single profile
|
78
|
+
# Options are:
|
79
|
+
# * :merge see Babel.register_profile for details
|
80
|
+
def self.load_profile(file, options = {})
|
81
|
+
Babel.register_profile(YAML.load_file(file), options)
|
82
|
+
end
|
83
|
+
|
84
|
+
# Save the profiles to a specifified directory.
|
85
|
+
# See Babel.save_profile() for options
|
44
86
|
def self.save_profiles(options = {})
|
45
|
-
dir = options[:directory] || PROFILE_DIR
|
46
87
|
@profiles.each do |lang, profile|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
88
|
+
Babel.save_profile(lang, options)
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
# Save a specific profile
|
93
|
+
# Options are:
|
94
|
+
# * :dir -> the directory wo save the files to. Defaults to Babel::PROFILE_DIR
|
95
|
+
# * :limit -> Call limit() on the profile before save. This reduces the size of the profile
|
96
|
+
# for the cost of (possibly) less accurate language guessing
|
97
|
+
def self.save_profile(lang, options = {})
|
98
|
+
dir = options[:dir] || PROFILE_DIR
|
99
|
+
profile = Babel.profile(lang)
|
100
|
+
profile.limit(options[:limit]) if options[:limit]
|
101
|
+
File.open(file_name(dir, lang), 'wb') do |file|
|
102
|
+
file.write(profile.ya2yaml)
|
51
103
|
end
|
104
|
+
end
|
105
|
+
|
106
|
+
private
|
107
|
+
|
108
|
+
# Build the file name for a profile file
|
109
|
+
# Naming scheme: profile_<LANG>.yml
|
110
|
+
def self.file_name(dir, lang)
|
111
|
+
File.join(dir, "profile_#{lang}.yml")
|
52
112
|
end
|
113
|
+
|
53
114
|
end
|
54
115
|
|
data/lib/babel/profile.rb
CHANGED
@@ -1,43 +1,61 @@
|
|
1
1
|
module Babel
|
2
2
|
class Profile
|
3
|
-
|
4
|
-
|
3
|
+
attr_reader :language
|
4
|
+
attr_reader :data
|
5
|
+
def initialize(language = nil)
|
6
|
+
@data = {}
|
5
7
|
@total_occurences = 0
|
8
|
+
@language = language
|
6
9
|
end
|
7
10
|
|
11
|
+
|
12
|
+
# learn a text
|
13
|
+
# following options are used when generating the n-grams:
|
14
|
+
# * min_length => 2
|
15
|
+
# * max_length => 5
|
16
|
+
# * pad => true
|
8
17
|
def learn(text, options = {})
|
9
18
|
options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
|
10
19
|
text = clean(text)
|
11
20
|
text.split(' ').each do |word|
|
12
|
-
|
13
|
-
ngrams.each do |ngram|
|
21
|
+
word.n_grams(options).each do |ngram|
|
14
22
|
self.occured(ngram)
|
15
23
|
end
|
16
24
|
end
|
25
|
+
# after learning rank the new n-grams
|
17
26
|
self.rank
|
18
27
|
self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
|
19
28
|
end
|
20
29
|
|
21
30
|
|
31
|
+
def merge(other)
|
32
|
+
if self.language != other.language
|
33
|
+
raise ArgumentError.new("self has a language of #{self.language} but profile to merge has #{other.language}")
|
34
|
+
end
|
35
|
+
other.data.each do |key, value|
|
36
|
+
self.occured(key, value.first)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
22
40
|
# TODO: needed?
|
23
41
|
def clean(text)
|
24
42
|
return text
|
25
|
-
text = text.gsub(
|
26
|
-
text = text.gsub('.', '')
|
27
|
-
text = text.gsub(';', '')
|
43
|
+
text = text.gsub(/[0-9]/, '')
|
28
44
|
text = text.gsub(':', '')
|
45
|
+
text = text.gsub('/', '')
|
46
|
+
text = text.gsub('_', '')
|
29
47
|
text = text.gsub('(', '')
|
30
48
|
text = text.gsub(')', '')
|
31
|
-
text = text.gsub('
|
32
|
-
text = text.gsub(
|
33
|
-
|
34
|
-
text
|
49
|
+
text = text.gsub(';', '')
|
50
|
+
text = text.gsub('?', '')
|
51
|
+
|
52
|
+
return text
|
35
53
|
end
|
54
|
+
|
36
55
|
# limit this profile to n items
|
37
56
|
# profile needs to be ranked first
|
38
|
-
# do not use this if you plan to extend the profile later on
|
39
57
|
def limit(boundary = 100)
|
40
|
-
@
|
58
|
+
@data.reject! do |key, value|
|
41
59
|
raise 'Please call rank() first' if value.last == 0
|
42
60
|
boundary < value.last
|
43
61
|
end
|
@@ -46,37 +64,37 @@ module Babel
|
|
46
64
|
# rank the current profile
|
47
65
|
# ngrams are sorted by occurence and then ranked
|
48
66
|
def rank
|
49
|
-
|
50
|
-
|
51
|
-
end.each_with_index do |item, index|
|
52
|
-
|
53
|
-
end
|
67
|
+
#@data.values.sort do |o1, o2|
|
68
|
+
# o2.first <=> o1.first
|
69
|
+
#end.each_with_index do |item, index|
|
70
|
+
# item[1] = index + 1
|
71
|
+
#end
|
54
72
|
|
55
|
-
@
|
73
|
+
@data.values.each do |value|
|
56
74
|
value[1] = value[0] / @total_occurences.to_f
|
57
75
|
end
|
58
76
|
end
|
59
77
|
|
60
|
-
# Called when a
|
78
|
+
# Called when a n-gram is occured, optional you can pass an
|
61
79
|
# amount (how many times the ngram occured)
|
62
80
|
def occured(ngram, amount = 1)
|
63
|
-
(@
|
81
|
+
(@data[ngram] ||= [0, 0])[0] += amount
|
64
82
|
@total_occurences += amount
|
65
83
|
end
|
66
84
|
|
67
85
|
# find the occurence of a ngram. if it never occured, returns 0
|
68
86
|
def occurence(ngram)
|
69
|
-
@
|
87
|
+
@data[ngram] ? @data[ngram].first : 0
|
70
88
|
end
|
71
89
|
|
72
90
|
# find the ranking of a ngram. if it is not yet ranked, return 0
|
73
91
|
def ranking(ngram)
|
74
|
-
@
|
92
|
+
@data[ngram] ? @data[ngram].last : 0
|
75
93
|
end
|
76
94
|
|
77
95
|
# Calculate the distance to another profile
|
78
96
|
def distance(other)
|
79
|
-
@
|
97
|
+
@data.inject(0) do |memo, item|
|
80
98
|
other_ranking = other.ranking(item.first)
|
81
99
|
if other_ranking == 0
|
82
100
|
memo += 1
|
@@ -88,7 +106,7 @@ module Babel
|
|
88
106
|
|
89
107
|
|
90
108
|
def to_s
|
91
|
-
@
|
109
|
+
@data.inspect
|
92
110
|
end
|
93
111
|
end
|
94
112
|
end
|
@@ -1,15 +1,19 @@
|
|
1
1
|
class String
|
2
|
-
|
3
|
-
#
|
4
|
-
|
2
|
+
# Generate n-grams for a string.
|
3
|
+
# options are:
|
4
|
+
# :min_length : minimum length of the n-grams (defaults to 1)
|
5
|
+
# :max_length : maximum length of the n-grams (defaults to self.length)
|
6
|
+
# :pad : pad wiht '_' to generate all possible n-grams (defaults to false)
|
7
|
+
def n_grams(options = {})
|
8
|
+
# TODO: recursive?
|
9
|
+
# TODO: use min/max length for loop index instead of looping
|
10
|
+
# all and then use if test to decide if to add or not
|
5
11
|
min_length = options[:min_length] || 1
|
6
12
|
max_length = options[:max_length] || self.length
|
7
13
|
pad = options[:pad] || false
|
8
14
|
value = options[:preserve_case] ? self : self.downcase
|
9
15
|
value = "_#{value}#{'_' * (value.length - 1)}" if pad
|
10
16
|
res = []
|
11
|
-
# TODO: use min/max length for loop index instead of looping
|
12
|
-
# all and then use if test to decide if to add or not
|
13
17
|
0.upto(value.length - 1) do |index|
|
14
18
|
index.upto(value.length - 1) do |len|
|
15
19
|
if value[index..len].length >= min_length && value[index..len].length <= max_length
|
@@ -20,22 +24,23 @@ class String
|
|
20
24
|
res
|
21
25
|
end
|
22
26
|
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
# value = options[:preserve_case] ? self : self.downcase
|
27
|
-
# res = []
|
28
|
-
#
|
29
|
-
# end
|
30
|
-
|
31
|
-
|
32
|
-
# Ask Babel about the language of this text
|
33
|
-
# Can return nil if no language found
|
27
|
+
# Ask Babel about the language of this text.
|
28
|
+
# Convenience method, just calls Babel.guess().
|
29
|
+
# See Babel.guess for description.
|
34
30
|
def language(options = {})
|
35
31
|
Babel.guess(self, options)
|
36
32
|
end
|
33
|
+
# Ask Bable about the languages this text could be.
|
34
|
+
# It will return all the registered languages with the most probable
|
35
|
+
# Language first. You might want to restrict this before presenting to
|
36
|
+
# the user.
|
37
|
+
def languages(options = {})
|
38
|
+
Babel.distances(self, options).map() {|item| item.first}
|
39
|
+
end
|
37
40
|
|
38
|
-
# Tell Babel that this text is in a given language
|
41
|
+
# Tell Babel that this text is in a given language.
|
42
|
+
# Convenience method, just calls Babel.learn().
|
43
|
+
# See Babel.learn for description
|
39
44
|
def language=(lang, options = {})
|
40
45
|
Babel.learn(lang, self, options)
|
41
46
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
--- !ruby/object:Babel::Profile
|
2
|
-
|
2
|
+
data:
|
3
3
|
(i:
|
4
4
|
- 1
|
5
5
|
- 1.48557506610809e-05
|
@@ -25359,4 +25359,5 @@
|
|
25359
25359
|
ützu:
|
25360
25360
|
- 1
|
25361
25361
|
- 1.48557506610809e-05
|
25362
|
+
language: deu
|
25362
25363
|
total_occurences: 67314
|
@@ -1,5 +1,5 @@
|
|
1
1
|
--- !ruby/object:Babel::Profile
|
2
|
-
|
2
|
+
data:
|
3
3
|
? ",_"
|
4
4
|
:
|
5
5
|
- 94
|
@@ -20791,4 +20791,5 @@
|
|
20791
20791
|
‐se:
|
20792
20792
|
- 1
|
20793
20793
|
- 1.85742412422453e-05
|
20794
|
+
language: eng
|
20794
20795
|
total_occurences: 53838
|
@@ -1,5 +1,5 @@
|
|
1
1
|
--- !ruby/object:Babel::Profile
|
2
|
-
|
2
|
+
data:
|
3
3
|
? ",_"
|
4
4
|
:
|
5
5
|
- 118
|
@@ -24961,4 +24961,5 @@
|
|
24961
24961
|
’é:
|
24962
24962
|
- 8
|
24963
24963
|
- 0.000123525415354209
|
24964
|
+
language: fra
|
24964
24965
|
total_occurences: 64764
|