simplificator-babel 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (375) hide show
  1. data/LICENSE +20 -0
  2. data/README.markdown +19 -0
  3. data/Rakefile +56 -0
  4. data/VERSION.yml +4 -0
  5. data/lib/babel/babel.rb +54 -0
  6. data/lib/babel/profile.rb +94 -0
  7. data/lib/babel/string_extensions.rb +42 -0
  8. data/lib/babel.rb +10 -0
  9. data/lib/data/udhr_txt/index.xml +385 -0
  10. data/lib/data/udhr_txt/udhr_007.txt +220 -0
  11. data/lib/data/udhr_txt/udhr_008.txt +220 -0
  12. data/lib/data/udhr_txt/udhr_009.txt +228 -0
  13. data/lib/data/udhr_txt/udhr_010.txt +219 -0
  14. data/lib/data/udhr_txt/udhr_011.txt +232 -0
  15. data/lib/data/udhr_txt/udhr_abk.txt +218 -0
  16. data/lib/data/udhr_txt/udhr_ace.txt +221 -0
  17. data/lib/data/udhr_txt/udhr_acu.txt +222 -0
  18. data/lib/data/udhr_txt/udhr_ada.txt +220 -0
  19. data/lib/data/udhr_txt/udhr_afr.txt +219 -0
  20. data/lib/data/udhr_txt/udhr_agr.txt +219 -0
  21. data/lib/data/udhr_txt/udhr_aii.txt +216 -0
  22. data/lib/data/udhr_txt/udhr_ajg.txt +219 -0
  23. data/lib/data/udhr_txt/udhr_aka_akuapem.txt +221 -0
  24. data/lib/data/udhr_txt/udhr_aka_asante.txt +220 -0
  25. data/lib/data/udhr_txt/udhr_aka_fante.txt +219 -0
  26. data/lib/data/udhr_txt/udhr_als.txt +220 -0
  27. data/lib/data/udhr_txt/udhr_amc.txt +215 -0
  28. data/lib/data/udhr_txt/udhr_ame.txt +222 -0
  29. data/lib/data/udhr_txt/udhr_amh.txt +209 -0
  30. data/lib/data/udhr_txt/udhr_amr.txt +221 -0
  31. data/lib/data/udhr_txt/udhr_arb.txt +220 -0
  32. data/lib/data/udhr_txt/udhr_arl.txt +222 -0
  33. data/lib/data/udhr_txt/udhr_arn.txt +218 -0
  34. data/lib/data/udhr_txt/udhr_ast.txt +221 -0
  35. data/lib/data/udhr_txt/udhr_auv.txt +217 -0
  36. data/lib/data/udhr_txt/udhr_ayr.txt +218 -0
  37. data/lib/data/udhr_txt/udhr_azj_cyrl.txt +218 -0
  38. data/lib/data/udhr_txt/udhr_azj_latn.txt +218 -0
  39. data/lib/data/udhr_txt/udhr_bam.txt +218 -0
  40. data/lib/data/udhr_txt/udhr_ban.txt +222 -0
  41. data/lib/data/udhr_txt/udhr_bba.txt +218 -0
  42. data/lib/data/udhr_txt/udhr_bci.txt +217 -0
  43. data/lib/data/udhr_txt/udhr_bcl.txt +219 -0
  44. data/lib/data/udhr_txt/udhr_bel.txt +221 -0
  45. data/lib/data/udhr_txt/udhr_bem.txt +217 -0
  46. data/lib/data/udhr_txt/udhr_ben.txt +222 -0
  47. data/lib/data/udhr_txt/udhr_bho.txt +219 -0
  48. data/lib/data/udhr_txt/udhr_bin.txt +232 -0
  49. data/lib/data/udhr_txt/udhr_bis.txt +218 -0
  50. data/lib/data/udhr_txt/udhr_blu.txt +219 -0
  51. data/lib/data/udhr_txt/udhr_boa.txt +223 -0
  52. data/lib/data/udhr_txt/udhr_bod.txt +221 -0
  53. data/lib/data/udhr_txt/udhr_bos_cyrl.txt +220 -0
  54. data/lib/data/udhr_txt/udhr_bos_latn.txt +220 -0
  55. data/lib/data/udhr_txt/udhr_bre.txt +222 -0
  56. data/lib/data/udhr_txt/udhr_btb.txt +217 -0
  57. data/lib/data/udhr_txt/udhr_bug.txt +222 -0
  58. data/lib/data/udhr_txt/udhr_bul.txt +218 -0
  59. data/lib/data/udhr_txt/udhr_cab.txt +221 -0
  60. data/lib/data/udhr_txt/udhr_cak.txt +217 -0
  61. data/lib/data/udhr_txt/udhr_cat.txt +220 -0
  62. data/lib/data/udhr_txt/udhr_cbr.txt +219 -0
  63. data/lib/data/udhr_txt/udhr_cbs.txt +153 -0
  64. data/lib/data/udhr_txt/udhr_cbt.txt +220 -0
  65. data/lib/data/udhr_txt/udhr_cbu.txt +218 -0
  66. data/lib/data/udhr_txt/udhr_ccx.txt +222 -0
  67. data/lib/data/udhr_txt/udhr_ceb.txt +218 -0
  68. data/lib/data/udhr_txt/udhr_ces.txt +221 -0
  69. data/lib/data/udhr_txt/udhr_cha.txt +219 -0
  70. data/lib/data/udhr_txt/udhr_chj.txt +220 -0
  71. data/lib/data/udhr_txt/udhr_chk.txt +220 -0
  72. data/lib/data/udhr_txt/udhr_chr.txt +10 -0
  73. data/lib/data/udhr_txt/udhr_cic.txt +220 -0
  74. data/lib/data/udhr_txt/udhr_cjk.txt +218 -0
  75. data/lib/data/udhr_txt/udhr_cjk_AO.txt +220 -0
  76. data/lib/data/udhr_txt/udhr_ckb.txt +217 -0
  77. data/lib/data/udhr_txt/udhr_cmn_hans.txt +220 -0
  78. data/lib/data/udhr_txt/udhr_cmn_hant.txt +220 -0
  79. data/lib/data/udhr_txt/udhr_cnh.txt +220 -0
  80. data/lib/data/udhr_txt/udhr_cni.txt +220 -0
  81. data/lib/data/udhr_txt/udhr_cos.txt +218 -0
  82. data/lib/data/udhr_txt/udhr_cot.txt +222 -0
  83. data/lib/data/udhr_txt/udhr_cpu.txt +219 -0
  84. data/lib/data/udhr_txt/udhr_crs.txt +217 -0
  85. data/lib/data/udhr_txt/udhr_csa.txt +223 -0
  86. data/lib/data/udhr_txt/udhr_csw.txt +163 -0
  87. data/lib/data/udhr_txt/udhr_ctd.txt +222 -0
  88. data/lib/data/udhr_txt/udhr_cym.txt +222 -0
  89. data/lib/data/udhr_txt/udhr_dag.txt +217 -0
  90. data/lib/data/udhr_txt/udhr_dan.txt +224 -0
  91. data/lib/data/udhr_txt/udhr_ddn.txt +217 -0
  92. data/lib/data/udhr_txt/udhr_deu_1901.txt +220 -0
  93. data/lib/data/udhr_txt/udhr_deu_1996.txt +220 -0
  94. data/lib/data/udhr_txt/udhr_dga.txt +220 -0
  95. data/lib/data/udhr_txt/udhr_dip.txt +217 -0
  96. data/lib/data/udhr_txt/udhr_div.txt +220 -0
  97. data/lib/data/udhr_txt/udhr_dyo.txt +217 -0
  98. data/lib/data/udhr_txt/udhr_dzo.txt +9 -0
  99. data/lib/data/udhr_txt/udhr_ell_monotonic.txt +220 -0
  100. data/lib/data/udhr_txt/udhr_ell_polytonic.txt +220 -0
  101. data/lib/data/udhr_txt/udhr_emk.txt +218 -0
  102. data/lib/data/udhr_txt/udhr_eml.txt +219 -0
  103. data/lib/data/udhr_txt/udhr_eng.txt +219 -0
  104. data/lib/data/udhr_txt/udhr_epo.txt +221 -0
  105. data/lib/data/udhr_txt/udhr_est.txt +219 -0
  106. data/lib/data/udhr_txt/udhr_eus.txt +220 -0
  107. data/lib/data/udhr_txt/udhr_eve.txt +207 -0
  108. data/lib/data/udhr_txt/udhr_ewe.txt +218 -0
  109. data/lib/data/udhr_txt/udhr_fao.txt +219 -0
  110. data/lib/data/udhr_txt/udhr_fij.txt +224 -0
  111. data/lib/data/udhr_txt/udhr_fin.txt +224 -0
  112. data/lib/data/udhr_txt/udhr_flm.txt +219 -0
  113. data/lib/data/udhr_txt/udhr_fon.txt +217 -0
  114. data/lib/data/udhr_txt/udhr_fra.txt +218 -0
  115. data/lib/data/udhr_txt/udhr_fri.txt +219 -0
  116. data/lib/data/udhr_txt/udhr_fuc.txt +217 -0
  117. data/lib/data/udhr_txt/udhr_fur.txt +220 -0
  118. data/lib/data/udhr_txt/udhr_gaa.txt +220 -0
  119. data/lib/data/udhr_txt/udhr_gag.txt +223 -0
  120. data/lib/data/udhr_txt/udhr_gax.txt +222 -0
  121. data/lib/data/udhr_txt/udhr_gjn.txt +220 -0
  122. data/lib/data/udhr_txt/udhr_gkp.txt +216 -0
  123. data/lib/data/udhr_txt/udhr_gla.txt +229 -0
  124. data/lib/data/udhr_txt/udhr_gle.txt +215 -0
  125. data/lib/data/udhr_txt/udhr_glg.txt +217 -0
  126. data/lib/data/udhr_txt/udhr_guc.txt +221 -0
  127. data/lib/data/udhr_txt/udhr_gug.txt +210 -0
  128. data/lib/data/udhr_txt/udhr_guj.txt +219 -0
  129. data/lib/data/udhr_txt/udhr_gyr.txt +203 -0
  130. data/lib/data/udhr_txt/udhr_hat_kreyol.txt +221 -0
  131. data/lib/data/udhr_txt/udhr_hat_popular.txt +221 -0
  132. data/lib/data/udhr_txt/udhr_hau_NE.txt +219 -0
  133. data/lib/data/udhr_txt/udhr_hau_NG.txt +219 -0
  134. data/lib/data/udhr_txt/udhr_haw.txt +219 -0
  135. data/lib/data/udhr_txt/udhr_hea.txt +219 -0
  136. data/lib/data/udhr_txt/udhr_heb.txt +216 -0
  137. data/lib/data/udhr_txt/udhr_hil.txt +217 -0
  138. data/lib/data/udhr_txt/udhr_hin.txt +222 -0
  139. data/lib/data/udhr_txt/udhr_hms.txt +219 -0
  140. data/lib/data/udhr_txt/udhr_hna.txt +217 -0
  141. data/lib/data/udhr_txt/udhr_hni.txt +218 -0
  142. data/lib/data/udhr_txt/udhr_hrv.txt +218 -0
  143. data/lib/data/udhr_txt/udhr_hsb.txt +220 -0
  144. data/lib/data/udhr_txt/udhr_hun.txt +218 -0
  145. data/lib/data/udhr_txt/udhr_hus.txt +222 -0
  146. data/lib/data/udhr_txt/udhr_huu.txt +220 -0
  147. data/lib/data/udhr_txt/udhr_hva.txt +220 -0
  148. data/lib/data/udhr_txt/udhr_hye.txt +234 -0
  149. data/lib/data/udhr_txt/udhr_ibb.txt +235 -0
  150. data/lib/data/udhr_txt/udhr_ibo.txt +219 -0
  151. data/lib/data/udhr_txt/udhr_ido.txt +224 -0
  152. data/lib/data/udhr_txt/udhr_iii.txt +9 -0
  153. data/lib/data/udhr_txt/udhr_ike.txt +163 -0
  154. data/lib/data/udhr_txt/udhr_ilo.txt +217 -0
  155. data/lib/data/udhr_txt/udhr_ina.txt +220 -0
  156. data/lib/data/udhr_txt/udhr_ind.txt +219 -0
  157. data/lib/data/udhr_txt/udhr_isl.txt +217 -0
  158. data/lib/data/udhr_txt/udhr_ita.txt +221 -0
  159. data/lib/data/udhr_txt/udhr_jav.txt +222 -0
  160. data/lib/data/udhr_txt/udhr_jpn.txt +219 -0
  161. data/lib/data/udhr_txt/udhr_kal.txt +218 -0
  162. data/lib/data/udhr_txt/udhr_kan.txt +216 -0
  163. data/lib/data/udhr_txt/udhr_kat.txt +221 -0
  164. data/lib/data/udhr_txt/udhr_kaz.txt +218 -0
  165. data/lib/data/udhr_txt/udhr_kbp.txt +218 -0
  166. data/lib/data/udhr_txt/udhr_kde.txt +212 -0
  167. data/lib/data/udhr_txt/udhr_kea.txt +219 -0
  168. data/lib/data/udhr_txt/udhr_kek.txt +219 -0
  169. data/lib/data/udhr_txt/udhr_khk.txt +217 -0
  170. data/lib/data/udhr_txt/udhr_khk_mong.txt +11 -0
  171. data/lib/data/udhr_txt/udhr_khm.txt +220 -0
  172. data/lib/data/udhr_txt/udhr_kin.txt +220 -0
  173. data/lib/data/udhr_txt/udhr_kir.txt +220 -0
  174. data/lib/data/udhr_txt/udhr_kmb.txt +219 -0
  175. data/lib/data/udhr_txt/udhr_knc.txt +230 -0
  176. data/lib/data/udhr_txt/udhr_kng.txt +219 -0
  177. data/lib/data/udhr_txt/udhr_kng_AO.txt +219 -0
  178. data/lib/data/udhr_txt/udhr_koo.txt +216 -0
  179. data/lib/data/udhr_txt/udhr_kor.txt +219 -0
  180. data/lib/data/udhr_txt/udhr_kqn.txt +218 -0
  181. data/lib/data/udhr_txt/udhr_kri.txt +226 -0
  182. data/lib/data/udhr_txt/udhr_ktu.txt +219 -0
  183. data/lib/data/udhr_txt/udhr_lao.txt +223 -0
  184. data/lib/data/udhr_txt/udhr_lat.txt +221 -0
  185. data/lib/data/udhr_txt/udhr_lat_1.txt +220 -0
  186. data/lib/data/udhr_txt/udhr_lav.txt +220 -0
  187. data/lib/data/udhr_txt/udhr_lia.txt +218 -0
  188. data/lib/data/udhr_txt/udhr_lin.txt +217 -0
  189. data/lib/data/udhr_txt/udhr_lin_tones.txt +214 -0
  190. data/lib/data/udhr_txt/udhr_lit.txt +218 -0
  191. data/lib/data/udhr_txt/udhr_lnc.txt +219 -0
  192. data/lib/data/udhr_txt/udhr_lns.txt +219 -0
  193. data/lib/data/udhr_txt/udhr_loz.txt +219 -0
  194. data/lib/data/udhr_txt/udhr_ltz.txt +218 -0
  195. data/lib/data/udhr_txt/udhr_lua.txt +219 -0
  196. data/lib/data/udhr_txt/udhr_lue.txt +217 -0
  197. data/lib/data/udhr_txt/udhr_lug.txt +216 -0
  198. data/lib/data/udhr_txt/udhr_lun.txt +216 -0
  199. data/lib/data/udhr_txt/udhr_mad.txt +223 -0
  200. data/lib/data/udhr_txt/udhr_mag.txt +220 -0
  201. data/lib/data/udhr_txt/udhr_mah.txt +220 -0
  202. data/lib/data/udhr_txt/udhr_mai.txt +223 -0
  203. data/lib/data/udhr_txt/udhr_mal.txt +210 -0
  204. data/lib/data/udhr_txt/udhr_mam.txt +218 -0
  205. data/lib/data/udhr_txt/udhr_mar.txt +219 -0
  206. data/lib/data/udhr_txt/udhr_maz.txt +218 -0
  207. data/lib/data/udhr_txt/udhr_mcd.txt +220 -0
  208. data/lib/data/udhr_txt/udhr_mcf.txt +223 -0
  209. data/lib/data/udhr_txt/udhr_men.txt +222 -0
  210. data/lib/data/udhr_txt/udhr_mic.txt +218 -0
  211. data/lib/data/udhr_txt/udhr_min.txt +221 -0
  212. data/lib/data/udhr_txt/udhr_miq.txt +213 -0
  213. data/lib/data/udhr_txt/udhr_mkd.txt +221 -0
  214. data/lib/data/udhr_txt/udhr_mlt.txt +217 -0
  215. data/lib/data/udhr_txt/udhr_mly_arab.txt +219 -0
  216. data/lib/data/udhr_txt/udhr_mly_latn.txt +218 -0
  217. data/lib/data/udhr_txt/udhr_mos.txt +216 -0
  218. data/lib/data/udhr_txt/udhr_mri.txt +219 -0
  219. data/lib/data/udhr_txt/udhr_mxi.txt +218 -0
  220. data/lib/data/udhr_txt/udhr_mxv.txt +223 -0
  221. data/lib/data/udhr_txt/udhr_mya.txt +219 -0
  222. data/lib/data/udhr_txt/udhr_mzi.txt +227 -0
  223. data/lib/data/udhr_txt/udhr_nav.txt +219 -0
  224. data/lib/data/udhr_txt/udhr_nba.txt +257 -0
  225. data/lib/data/udhr_txt/udhr_nbl.txt +218 -0
  226. data/lib/data/udhr_txt/udhr_ndo.txt +217 -0
  227. data/lib/data/udhr_txt/udhr_nep.txt +214 -0
  228. data/lib/data/udhr_txt/udhr_nhn.txt +221 -0
  229. data/lib/data/udhr_txt/udhr_nld.txt +217 -0
  230. data/lib/data/udhr_txt/udhr_nno.txt +219 -0
  231. data/lib/data/udhr_txt/udhr_nob.txt +225 -0
  232. data/lib/data/udhr_txt/udhr_not.txt +218 -0
  233. data/lib/data/udhr_txt/udhr_nso.txt +219 -0
  234. data/lib/data/udhr_txt/udhr_nya_chechewa.txt +221 -0
  235. data/lib/data/udhr_txt/udhr_nya_chinyanja.txt +218 -0
  236. data/lib/data/udhr_txt/udhr_nym.txt +229 -0
  237. data/lib/data/udhr_txt/udhr_nyn.txt +213 -0
  238. data/lib/data/udhr_txt/udhr_nzi.txt +221 -0
  239. data/lib/data/udhr_txt/udhr_ojb.txt +221 -0
  240. data/lib/data/udhr_txt/udhr_oss.txt +214 -0
  241. data/lib/data/udhr_txt/udhr_ote.txt +218 -0
  242. data/lib/data/udhr_txt/udhr_pam.txt +225 -0
  243. data/lib/data/udhr_txt/udhr_pan.txt +227 -0
  244. data/lib/data/udhr_txt/udhr_pau.txt +219 -0
  245. data/lib/data/udhr_txt/udhr_pbb.txt +218 -0
  246. data/lib/data/udhr_txt/udhr_pbu.txt +9 -0
  247. data/lib/data/udhr_txt/udhr_pcd.txt +218 -0
  248. data/lib/data/udhr_txt/udhr_pcm.txt +218 -0
  249. data/lib/data/udhr_txt/udhr_pes_1.txt +218 -0
  250. data/lib/data/udhr_txt/udhr_pes_2.txt +222 -0
  251. data/lib/data/udhr_txt/udhr_pis.txt +219 -0
  252. data/lib/data/udhr_txt/udhr_plt.txt +214 -0
  253. data/lib/data/udhr_txt/udhr_pnb.txt +223 -0
  254. data/lib/data/udhr_txt/udhr_pol.txt +220 -0
  255. data/lib/data/udhr_txt/udhr_pon.txt +218 -0
  256. data/lib/data/udhr_txt/udhr_por_BR.txt +231 -0
  257. data/lib/data/udhr_txt/udhr_por_PT.txt +219 -0
  258. data/lib/data/udhr_txt/udhr_pov.txt +220 -0
  259. data/lib/data/udhr_txt/udhr_ppl.txt +219 -0
  260. data/lib/data/udhr_txt/udhr_prq.txt +151 -0
  261. data/lib/data/udhr_txt/udhr_prv.txt +207 -0
  262. data/lib/data/udhr_txt/udhr_quc.txt +217 -0
  263. data/lib/data/udhr_txt/udhr_qud.txt +218 -0
  264. data/lib/data/udhr_txt/udhr_quy.txt +221 -0
  265. data/lib/data/udhr_txt/udhr_quz.txt +223 -0
  266. data/lib/data/udhr_txt/udhr_qva.txt +219 -0
  267. data/lib/data/udhr_txt/udhr_qvc.txt +218 -0
  268. data/lib/data/udhr_txt/udhr_qvh.txt +217 -0
  269. data/lib/data/udhr_txt/udhr_qvm.txt +219 -0
  270. data/lib/data/udhr_txt/udhr_qvn.txt +217 -0
  271. data/lib/data/udhr_txt/udhr_qwh.txt +218 -0
  272. data/lib/data/udhr_txt/udhr_qxa.txt +217 -0
  273. data/lib/data/udhr_txt/udhr_qxn.txt +216 -0
  274. data/lib/data/udhr_txt/udhr_qxu.txt +221 -0
  275. data/lib/data/udhr_txt/udhr_rar.txt +220 -0
  276. data/lib/data/udhr_txt/udhr_rmn.txt +220 -0
  277. data/lib/data/udhr_txt/udhr_rmn_1.txt +221 -0
  278. data/lib/data/udhr_txt/udhr_rmy.txt +218 -0
  279. data/lib/data/udhr_txt/udhr_roh.txt +217 -0
  280. data/lib/data/udhr_txt/udhr_ron_1953.txt +218 -0
  281. data/lib/data/udhr_txt/udhr_ron_1993.txt +218 -0
  282. data/lib/data/udhr_txt/udhr_ron_2006.txt +218 -0
  283. data/lib/data/udhr_txt/udhr_run.txt +218 -0
  284. data/lib/data/udhr_txt/udhr_rus.txt +220 -0
  285. data/lib/data/udhr_txt/udhr_sag.txt +220 -0
  286. data/lib/data/udhr_txt/udhr_san.txt +219 -0
  287. data/lib/data/udhr_txt/udhr_sco.txt +222 -0
  288. data/lib/data/udhr_txt/udhr_shp.txt +224 -0
  289. data/lib/data/udhr_txt/udhr_skr.txt +225 -0
  290. data/lib/data/udhr_txt/udhr_slk.txt +219 -0
  291. data/lib/data/udhr_txt/udhr_slv.txt +218 -0
  292. data/lib/data/udhr_txt/udhr_sme.txt +220 -0
  293. data/lib/data/udhr_txt/udhr_smo.txt +226 -0
  294. data/lib/data/udhr_txt/udhr_sna.txt +223 -0
  295. data/lib/data/udhr_txt/udhr_snk.txt +220 -0
  296. data/lib/data/udhr_txt/udhr_som.txt +216 -0
  297. data/lib/data/udhr_txt/udhr_sot.txt +220 -0
  298. data/lib/data/udhr_txt/udhr_spa.txt +220 -0
  299. data/lib/data/udhr_txt/udhr_src.txt +220 -0
  300. data/lib/data/udhr_txt/udhr_srp_cyrl.txt +218 -0
  301. data/lib/data/udhr_txt/udhr_srp_latn.txt +218 -0
  302. data/lib/data/udhr_txt/udhr_srr.txt +219 -0
  303. data/lib/data/udhr_txt/udhr_ssw.txt +228 -0
  304. data/lib/data/udhr_txt/udhr_suk.txt +218 -0
  305. data/lib/data/udhr_txt/udhr_sun.txt +227 -0
  306. data/lib/data/udhr_txt/udhr_sus.txt +218 -0
  307. data/lib/data/udhr_txt/udhr_swe.txt +224 -0
  308. data/lib/data/udhr_txt/udhr_swh.txt +221 -0
  309. data/lib/data/udhr_txt/udhr_tah.txt +217 -0
  310. data/lib/data/udhr_txt/udhr_taj.txt +10 -0
  311. data/lib/data/udhr_txt/udhr_tam.txt +227 -0
  312. data/lib/data/udhr_txt/udhr_tat.txt +219 -0
  313. data/lib/data/udhr_txt/udhr_tbz.txt +219 -0
  314. data/lib/data/udhr_txt/udhr_tca.txt +219 -0
  315. data/lib/data/udhr_txt/udhr_tem.txt +216 -0
  316. data/lib/data/udhr_txt/udhr_tet.txt +219 -0
  317. data/lib/data/udhr_txt/udhr_tgk.txt +217 -0
  318. data/lib/data/udhr_txt/udhr_tgl.txt +224 -0
  319. data/lib/data/udhr_txt/udhr_tgl_tglg.txt +9 -0
  320. data/lib/data/udhr_txt/udhr_tha.txt +217 -0
  321. data/lib/data/udhr_txt/udhr_tir.txt +217 -0
  322. data/lib/data/udhr_txt/udhr_tiv.txt +232 -0
  323. data/lib/data/udhr_txt/udhr_tob.txt +218 -0
  324. data/lib/data/udhr_txt/udhr_toi.txt +216 -0
  325. data/lib/data/udhr_txt/udhr_toj.txt +219 -0
  326. data/lib/data/udhr_txt/udhr_ton.txt +221 -0
  327. data/lib/data/udhr_txt/udhr_top.txt +220 -0
  328. data/lib/data/udhr_txt/udhr_tpi.txt +219 -0
  329. data/lib/data/udhr_txt/udhr_tsn.txt +219 -0
  330. data/lib/data/udhr_txt/udhr_tso_MZ.txt +220 -0
  331. data/lib/data/udhr_txt/udhr_tsz.txt +218 -0
  332. data/lib/data/udhr_txt/udhr_tuk_cyrl.txt +216 -0
  333. data/lib/data/udhr_txt/udhr_tuk_latn.txt +221 -0
  334. data/lib/data/udhr_txt/udhr_tur.txt +219 -0
  335. data/lib/data/udhr_txt/udhr_tzc.txt +219 -0
  336. data/lib/data/udhr_txt/udhr_tzh.txt +218 -0
  337. data/lib/data/udhr_txt/udhr_tzm.txt +220 -0
  338. data/lib/data/udhr_txt/udhr_tzm_tfng.txt +9 -0
  339. data/lib/data/udhr_txt/udhr_uig_arab.txt +219 -0
  340. data/lib/data/udhr_txt/udhr_uig_latn.txt +219 -0
  341. data/lib/data/udhr_txt/udhr_ukr.txt +218 -0
  342. data/lib/data/udhr_txt/udhr_umb.txt +218 -0
  343. data/lib/data/udhr_txt/udhr_ura.txt +219 -0
  344. data/lib/data/udhr_txt/udhr_urd.txt +9 -0
  345. data/lib/data/udhr_txt/udhr_uzn_cyrl.txt +220 -0
  346. data/lib/data/udhr_txt/udhr_uzn_latn.txt +220 -0
  347. data/lib/data/udhr_txt/udhr_vai.txt +224 -0
  348. data/lib/data/udhr_txt/udhr_vie.txt +221 -0
  349. data/lib/data/udhr_txt/udhr_vmw.txt +220 -0
  350. data/lib/data/udhr_txt/udhr_war.txt +219 -0
  351. data/lib/data/udhr_txt/udhr_wln.txt +220 -0
  352. data/lib/data/udhr_txt/udhr_wol.txt +219 -0
  353. data/lib/data/udhr_txt/udhr_wwa.txt +109 -0
  354. data/lib/data/udhr_txt/udhr_xho.txt +219 -0
  355. data/lib/data/udhr_txt/udhr_xsm.txt +219 -0
  356. data/lib/data/udhr_txt/udhr_yad.txt +220 -0
  357. data/lib/data/udhr_txt/udhr_yao.txt +214 -0
  358. data/lib/data/udhr_txt/udhr_yap.txt +220 -0
  359. data/lib/data/udhr_txt/udhr_ydd.txt +223 -0
  360. data/lib/data/udhr_txt/udhr_ykg.txt +211 -0
  361. data/lib/data/udhr_txt/udhr_yor.txt +218 -0
  362. data/lib/data/udhr_txt/udhr_yua.txt +218 -0
  363. data/lib/data/udhr_txt/udhr_zam.txt +223 -0
  364. data/lib/data/udhr_txt/udhr_ztu.txt +219 -0
  365. data/lib/data/udhr_txt/udhr_zul.txt +219 -0
  366. data/lib/profiles/profile_deu_1996.yml +25362 -0
  367. data/lib/profiles/profile_eng.yml +20794 -0
  368. data/lib/profiles/profile_fra.yml +24964 -0
  369. data/lib/profiles/profile_spa.yml +23020 -0
  370. data/test/babel_test.rb +44 -0
  371. data/test/profile_test.rb +105 -0
  372. data/test/string_extensions_test.rb +43 -0
  373. data/test/test_helper.rb +10 -0
  374. data/test/train.rb +26 -0
  375. metadata +440 -0
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Simplificator GmbH
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.markdown ADDED
@@ -0,0 +1,19 @@
1
+ #babel
2
+
3
+ Babel is a gem to identify in what language a text is written.
4
+ It is based on the n-gram approach by Cacnar and Trenkle as described in http://www.sfs.uni-tuebingen.de/iscl/Theses/kranig.pdf
5
+
6
+
7
+ ##usage
8
+ require 'rubygems'
9
+ require 'simplificator-babel'
10
+
11
+ # Train babel: feed it some texts
12
+ 'An english text to train and learn'.language= 'en'
13
+ 'Ein deutscher Text'.language= 'de'
14
+
15
+ puts
16
+
17
+ ##Copyright
18
+
19
+ Copyright (c) 2009 Simplificator GmbH. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,56 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "babel"
8
+ gem.summary = %Q{Utility to guess the language of a text}
9
+ gem.email = "info@simplificator.com"
10
+ gem.homepage = "http://github.com/simplificator/babel"
11
+ gem.authors = ["simplificator"]
12
+ gem.add_dependency('ya2yaml', '>= 0.2.6')
13
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
14
+ end
15
+ rescue LoadError
16
+ puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
17
+ end
18
+
19
+ require 'rake/testtask'
20
+ Rake::TestTask.new(:test) do |test|
21
+ test.libs << 'lib' << 'test'
22
+ test.pattern = 'test/**/*_test.rb'
23
+ test.verbose = true
24
+ end
25
+
26
+ begin
27
+ require 'rcov/rcovtask'
28
+ Rcov::RcovTask.new do |test|
29
+ test.libs << 'test'
30
+ test.pattern = 'test/**/*_test.rb'
31
+ test.verbose = true
32
+ end
33
+ rescue LoadError
34
+ task :rcov do
35
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
36
+ end
37
+ end
38
+
39
+
40
+ task :default => :test
41
+
42
+ require 'rake/rdoctask'
43
+ Rake::RDocTask.new do |rdoc|
44
+ if File.exist?('VERSION.yml')
45
+ config = YAML.load(File.read('VERSION.yml'))
46
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
47
+ else
48
+ version = ""
49
+ end
50
+
51
+ rdoc.rdoc_dir = 'rdoc'
52
+ rdoc.title = "babel #{version}"
53
+ rdoc.rdoc_files.include('README*')
54
+ rdoc.rdoc_files.include('lib/**/*.rb')
55
+ end
56
+
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :patch: 4
3
+ :major: 0
4
+ :minor: 0
@@ -0,0 +1,54 @@
1
+ module Babel
2
+ @profiles = {}
3
+ PROFILE_DIR = File.join(File.dirname(__FILE__), '..', 'profiles')
4
+ def self.learn(lang, text, options = {})
5
+ lang = lang.to_s
6
+ profile = @profiles[lang] ||= Profile.new()
7
+ profile.learn(text, options)
8
+ end
9
+
10
+
11
+ def self.clear_profiles
12
+ @profiles = {}
13
+ end
14
+
15
+ def self.guess(source, options = {})
16
+ found = nil
17
+ Babel.distances(source).each do |entry|
18
+ found = entry if found.nil? || entry.last < found.last
19
+ end
20
+ found.first if found
21
+ end
22
+
23
+ # An array of arrays of [language, distance] arrays
24
+ def self.distances(text)
25
+ source = Profile.new.learn(text)
26
+ @profiles.map { |lang, target| [lang, source.distance(target)] }
27
+ end
28
+
29
+ private
30
+
31
+ def self.file_name(dir, lang)
32
+ File.join(dir, "profile_#{lang}.yml")
33
+ end
34
+
35
+ # Load a specific profile ()
36
+ def self.load_profiles(options = {})
37
+ dir = options[:directory] || PROFILE_DIR
38
+ Dir[File.join(PROFILE_DIR, '*.yml')].each do |file|
39
+ file =~ /profile_(.+)\.yml/
40
+ @profiles[$1] = YAML.load_file(file)
41
+ end
42
+ end
43
+
44
+ def self.save_profiles(options = {})
45
+ dir = options[:directory] || PROFILE_DIR
46
+ @profiles.each do |lang, profile|
47
+ profile.limit(options[:limit]) if options.has_key?(:limit)
48
+ File.open(file_name(dir, lang), 'wb') do |file|
49
+ file.write(profile.ya2yaml)
50
+ end
51
+ end
52
+ end
53
+ end
54
+
@@ -0,0 +1,94 @@
1
+ module Babel
2
+ class Profile
3
+ def initialize()
4
+ @profile = {}
5
+ @total_occurences = 0
6
+ end
7
+
8
+ def learn(text, options = {})
9
+ options = {:min_length => 2, :max_length => 5, :pad => true}.merge(options)
10
+ text = clean(text)
11
+ text.split(' ').each do |word|
12
+ ngrams = word.ngrams(options)
13
+ ngrams.each do |ngram|
14
+ self.occured(ngram)
15
+ end
16
+ end
17
+ self.rank
18
+ self # return self so we can chain learn commans. profile.learn('asasas').learn('asdsad')
19
+ end
20
+
21
+
22
+ # TODO: needed?
23
+ def clean(text)
24
+ return text
25
+ text = text.gsub('?', '')
26
+ text = text.gsub('.', '')
27
+ text = text.gsub(';', '')
28
+ text = text.gsub(':', '')
29
+ text = text.gsub('(', '')
30
+ text = text.gsub(')', '')
31
+ text = text.gsub('/', '')
32
+ text = text.gsub(/[0-9]*/, '')
33
+ text = text.gsub('+', '')
34
+ text
35
+ end
36
+ # limit this profile to n items
37
+ # profile needs to be ranked first
38
+ # do not use this if you plan to extend the profile later on
39
+ def limit(boundary = 100)
40
+ @profile.reject! do |key, value|
41
+ raise 'Please call rank() first' if value.last == 0
42
+ boundary < value.last
43
+ end
44
+ end
45
+
46
+ # rank the current profile
47
+ # ngrams are sorted by occurence and then ranked
48
+ def rank
49
+ @profile.values.sort do |o1, o2|
50
+ o2.first <=> o1.first
51
+ end.each_with_index do |item, index|
52
+ item[1] = index + 1
53
+ end
54
+
55
+ @profile.values.each do |value|
56
+ value[1] = value[0] / @total_occurences.to_f
57
+ end
58
+ end
59
+
60
+ # Called when a ngram is occured, optional you can pass an
61
+ # amount (how many times the ngram occured)
62
+ def occured(ngram, amount = 1)
63
+ (@profile[ngram] ||= [0, 0])[0] += amount
64
+ @total_occurences += amount
65
+ end
66
+
67
+ # find the occurence of a ngram. if it never occured, returns 0
68
+ def occurence(ngram)
69
+ @profile[ngram] ? @profile[ngram].first : 0
70
+ end
71
+
72
+ # find the ranking of a ngram. if it is not yet ranked, return 0
73
+ def ranking(ngram)
74
+ @profile[ngram] ? @profile[ngram].last : 0
75
+ end
76
+
77
+ # Calculate the distance to another profile
78
+ def distance(other)
79
+ @profile.inject(0) do |memo, item|
80
+ other_ranking = other.ranking(item.first)
81
+ if other_ranking == 0
82
+ memo += 1
83
+ else
84
+ memo += (other_ranking - item.last.last).abs
85
+ end
86
+ end
87
+ end
88
+
89
+
90
+ def to_s
91
+ @profile.inspect
92
+ end
93
+ end
94
+ end
@@ -0,0 +1,42 @@
1
+ class String
2
+
3
+ # TODO: recursive?
4
+ def ngrams(options = {})
5
+ min_length = options[:min_length] || 1
6
+ max_length = options[:max_length] || self.length
7
+ pad = options[:pad] || false
8
+ value = options[:preserve_case] ? self : self.downcase
9
+ value = "_#{value}#{'_' * (value.length - 1)}" if pad
10
+ res = []
11
+ # TODO: use min/max length for loop index instead of looping
12
+ # all and then use if test to decide if to add or not
13
+ 0.upto(value.length - 1) do |index|
14
+ index.upto(value.length - 1) do |len|
15
+ if value[index..len].length >= min_length && value[index..len].length <= max_length
16
+ res << value[index..len]
17
+ end
18
+ end
19
+ end
20
+ res
21
+ end
22
+
23
+ # def byte_grams(options = {})
24
+ # min_length = options[:min_length] || 1
25
+ # max_length = options[:max_length] || self.length
26
+ # value = options[:preserve_case] ? self : self.downcase
27
+ # res = []
28
+ #
29
+ # end
30
+
31
+
32
+ # Ask Babel about the language of this text
33
+ # Can return nil if no language found
34
+ def language(options = {})
35
+ Babel.guess(self, options)
36
+ end
37
+
38
+ # Tell Babel that this text is in a given language
39
+ def language=(lang, options = {})
40
+ Babel.learn(lang, self, options)
41
+ end
42
+ end
data/lib/babel.rb ADDED
@@ -0,0 +1,10 @@
1
+ if RUBY_VERSION < '1.9'
2
+ require 'jcode'
3
+ $KCODE = 'u'
4
+ end
5
+
6
+ require File.dirname(__FILE__) + '/babel/string_extensions'
7
+ require File.dirname(__FILE__) + '/babel/babel'
8
+ require File.dirname(__FILE__) + '/babel/profile'
9
+
10
+ require 'ya2yaml'