interscript 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. checksums.yaml +4 -4
  2. data/lib/interscript.rb +10 -6
  3. data/lib/interscript/fs.rb +0 -2
  4. data/lib/interscript/mapping.rb +1 -1
  5. data/lib/interscript/opal.rb +38 -8
  6. data/lib/interscript/opal/entrypoint.rb +12 -0
  7. data/lib/interscript/opal/map_translate.rb +7 -0
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  15. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
  16. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
  17. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  18. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  19. data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
  20. data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
  21. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  22. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  23. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  24. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  25. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
  26. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  27. data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
  28. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  29. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  30. data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
  31. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  32. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
  33. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  34. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  35. data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
  36. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  37. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  38. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  39. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  40. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
  41. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
  42. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  43. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  44. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  45. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  46. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
  47. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  48. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  49. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  50. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  51. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  52. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
  53. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  54. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
  55. data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
  56. data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
  57. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  58. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  59. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
  60. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
  61. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  62. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
  63. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
  64. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
  65. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
  66. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  67. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  68. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
  69. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
  70. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
  71. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  72. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  73. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
  75. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
  76. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
  77. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  78. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  79. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  80. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
  81. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  82. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  83. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  84. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  85. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  86. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  87. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  88. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  89. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
  90. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  91. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  92. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  93. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
  94. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
  95. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
  96. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
  97. data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
  98. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
  99. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
  100. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
  101. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
  102. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
  103. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
  104. data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
  105. data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
  106. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  107. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  108. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  109. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  110. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  111. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  112. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  113. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  114. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  115. data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
  116. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
  117. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
  118. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
  119. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
  120. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
  121. data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
  122. data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
  123. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
  124. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  125. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  126. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
  127. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
  128. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
  129. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  130. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  131. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
  132. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  133. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
  134. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  135. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  136. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  137. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
  138. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
  139. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  140. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  141. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  142. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  143. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  144. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  145. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  146. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  147. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  148. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  149. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  150. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  151. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  152. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  153. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  154. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  155. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  156. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  157. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  158. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
  159. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  160. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  161. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  162. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  163. data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
  164. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
  165. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  166. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  167. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  168. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  169. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  170. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
  171. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
  172. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
  173. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
  174. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  175. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  176. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  177. data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
  178. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  179. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  180. data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
  181. data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
  182. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
  183. data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
  184. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  185. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
  186. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  200. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  201. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  202. data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
  203. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  204. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  205. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  206. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  207. data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
  208. data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
  209. data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
  210. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  211. data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
  212. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  213. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
  214. data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
  215. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
  216. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  217. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  218. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  219. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  220. data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
  221. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  222. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  223. data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
  224. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  225. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  226. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  227. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  228. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
  229. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  230. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  231. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
  232. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  233. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  234. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  235. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  236. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  237. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  238. data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
  239. data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
  240. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
  241. data/spec/interscript/filenames_spec.rb +384 -0
  242. data/spec/interscript_spec.rb +7 -4
  243. metadata +105 -26
  244. data/bin/interscript +0 -41
  245. data/bin/rspec +0 -29
  246. data/bin/setup +0 -8
  247. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  248. data/lib/interscript-opal.rb +0 -2
  249. data/lib/interscript/opal_map_translate.rb +0 -12
  250. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  251. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7557ea1ca381562c61be7dbbeab2ea4adc42ef57ce857ef86acd62a08e5ce588
4
- data.tar.gz: 5099d4a7bf07817155d620db716af452607b8565f4e08d37aa805f486556e0e1
3
+ metadata.gz: 82fae2b248d9c86139b7f188da2ac72699696c9768e54a4510a6f1af2b933dc9
4
+ data.tar.gz: 3e2cc24b8d33f5a8ed0f8b475e4d109049439221274ecda9ee1b9c7743896e07
5
5
  SHA512:
6
- metadata.gz: f8a738a34aba269c0a01b4d123f01cc423a3c296541f302f65a6043bd2b618ef2c702f32d469866ee7c488624985b45ed2eee2ad9bbe2955f230b5b45472c364
7
- data.tar.gz: a3a66af7fcb9d8c82bcf927b17ea68686298bccec2f6e8dfac25796e7013b36f68fa9f130bd2fb9e7bc214a528896cafdd41a2f464886f09c504f376c19065d8
6
+ metadata.gz: 5f9925a97d17f0433446a898f63d18869a73e92f9975d8259c916dae242fc5b15ff93dd6d3c28ca2ff5bcbda29489d4ea59af26d4e16f2e1416d701354a6f6e2
7
+ data.tar.gz: 77321c4a1001cabda8cc057950682037b637176fd05637ce78a6ea698d8c55e238e6bbcc850dda97828c719afbdae1192a8e7a93fd2307a9d52196342fb015f5
@@ -90,7 +90,8 @@ module Interscript
90
90
  end
91
91
 
92
92
  charmap.each do |k, v|
93
- while (match = output&.match(/#{k}/))
93
+ re = mkregexp(k)
94
+ while (match = output&.match(re))
94
95
  pos = match.offset(0).first
95
96
  result = !downcase && up_case_around?(output, pos) ? v.upcase : v
96
97
 
@@ -118,12 +119,15 @@ module Interscript
118
119
 
119
120
  return unless output
120
121
 
121
- output = output.sub(/^(.)/, &:upcase) if title_case
122
+ re = mkregexp('^(.)')
123
+ output = output.gsub(re, &:upcase) if title_case
122
124
  if word_separator != ''
123
- output = output.gsub(/#{word_separator}#{separator}/u, word_separator)
125
+ re = mkregexp("#{word_separator}#{separator}")
126
+ output = output.gsub(re, word_separator)
124
127
 
125
128
  if title_case
126
- output = output.gsub(/#{word_separator}(.)/u, &:upcase)
129
+ re = mkregexp("#{word_separator}(.)")
130
+ output = output.gsub(re, &:upcase)
127
131
  end
128
132
  end
129
133
 
@@ -140,11 +144,11 @@ module Interscript
140
144
  return false if string[pos] == string[pos].downcase
141
145
 
142
146
  i = pos - 1
143
- i -= 1 while i.positive? && string[i] !~ Regexp.new(ALPHA_REGEXP)
147
+ i -= 1 while i.positive? && string[i] !~ mkregexp('[[:alpha:]]')
144
148
  before = i >= 0 && i < pos ? string[i].to_s.strip : ''
145
149
 
146
150
  i = pos + 1
147
- i += 1 while i < string.size - 1 && string[i] !~ Regexp.new(ALPHA_REGEXP)
151
+ i += 1 while i < string.size - 1 && string[i] !~ mkregexp('[[:alpha:]]')
148
152
  after = i > pos ? string[i].to_s.strip : ''
149
153
 
150
154
  before_uc = !before.empty? && before == before.upcase
@@ -2,8 +2,6 @@ require 'pathname'
2
2
 
3
3
  module Interscript
4
4
  module Fs
5
- ALPHA_REGEXP = '[[:alpha:]]'
6
-
7
5
  def sub_replace(string, pos, size, repl)
8
6
  string[pos..pos + size - 1] = repl
9
7
  string
@@ -122,7 +122,7 @@ module Interscript
122
122
 
123
123
  inherited_mapping = Mapping.for(inherit_system, depth: depth + 1)
124
124
 
125
- @rules = [inherited_mapping.rules, rules].flatten
125
+ @rules = [rules, inherited_mapping.rules].flatten
126
126
  @postrules = [inherited_mapping.postrules, postrules].flatten
127
127
  @characters = (inherited_mapping.characters|| {}).merge(characters)
128
128
  @dictionary = (inherited_mapping.dictionary|| {}).merge(dictionary)
@@ -1,14 +1,34 @@
1
+ require "onigmo"
2
+ require "onigmo/core_ext"
3
+
4
+ # Increase this if there are out-of-memory errors. This setting is
5
+ # tested to be big enough to handle all the maps provided.
6
+ Onigmo::FFI.library.memory.grow(128)
7
+
1
8
  module Interscript
2
9
  module Opal
3
- ALPHA_REGEXP = '\p{L}'
4
-
5
10
  def mkregexp(regexpstring)
6
- flags = 'u'
7
- if regexpstring.include? "(?i)"
8
- regexpstring = regexpstring.gsub("(?i)", "").gsub("(?-i)", "")
9
- flags = 'ui'
11
+ @cache ||= {}
12
+ if s = @cache[regexpstring]
13
+ s
14
+ else
15
+ # JS regexp is more performant than Onigmo. Let's use the JS
16
+ # regexp wherever possible, but use Onigmo where we must.
17
+ # Let's allow those characters to happen for the regexp to be
18
+ # considered compatible: ()|.*+?{} ** BUT NOT (? **.
19
+ if /[\\$^\[\]]|\(\?/.match?(regexpstring)
20
+ # Ruby caches its regexps internally. We can't GC. We could
21
+ # think about freeing them, but we really can't, because they
22
+ # may be in use.
23
+
24
+ # Uncomment those to keep track of Onigmo/JS regexp compilation.
25
+ # print '#'
26
+ @cache[regexpstring] = Onigmo::Regexp.new(regexpstring)
27
+ else
28
+ # print '.'
29
+ @cache[regexpstring] = Regexp.new(regexpstring)
30
+ end
10
31
  end
11
- Regexp.new("/#{regexpstring}/#{flags}")
12
32
  end
13
33
 
14
34
  def sub_replace(string, pos, size, repl)
@@ -19,9 +39,19 @@ module Interscript
19
39
  string
20
40
  end
21
41
 
42
+ # name is unused
22
43
  def load_map_json(name, json)
23
- `Opal.global.InterscriptMaps[#{name}] = #{json}`
44
+ JSON.load(json).each do |k,v|
45
+ `Opal.global.InterscriptMaps[#{k}] = #{JSON.dump(v)}`
46
+ end
24
47
  end
25
48
 
26
49
  end
27
50
  end
51
+
52
+ class String
53
+ # Opal has a wrong implementation of String#unicode_normalize
54
+ def unicode_normalize
55
+ self.JS.normalize
56
+ end
57
+ end
@@ -0,0 +1,12 @@
1
+ require "opal"
2
+ require "onigmo/onigmo-wasm"
3
+
4
+ module Interscript
5
+ def self.on_load(&block)
6
+ WebAssembly.wait_for("onigmo/onigmo-wasm", &block)
7
+ end
8
+ end
9
+
10
+ Interscript.on_load do
11
+ require "interscript"
12
+ end
@@ -0,0 +1,7 @@
1
+ module Interscript
2
+ module OpalMapTranslate
3
+ def self.translate_regexp(src)
4
+ src
5
+ end
6
+ end
7
+ end
@@ -1,3 +1,3 @@
1
1
  module Interscript
2
- VERSION = "0.1.6"
2
+ VERSION = "0.1.7"
3
3
  end
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: acadsin
3
3
  id: 2002
4
- language: zho
4
+ language: iso-639-2:zho
5
5
  source_script: Hani
6
6
  destination_script: Latn
7
7
  name: Chinese Tongyong Pinyin Academica Sinica 2002 System
8
+ alias:
9
+ ogc11122:
10
+ code: zho_Hani2Latn_AcadSin_2002
11
+ description: Chinese Tongyong Pinyin Academica Sinica 2002 System
8
12
  url:
9
13
  description: Chinese Tongyong Pinyin Academica Sinica 2002 System
10
14
 
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: amh
4
+ language: iso-639-2:amh
5
5
  source_script: Ethi
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Amharic (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: amh_Ethi2Latn_ALA_1997
11
+ description: Amharic ALA-Library of Congress 1997 System
8
12
  url: http://catdir.loc.gov/catdir/cpso/romanization/amharic.pdf
9
13
  creation_date: 1997
10
14
  description: |
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 2011
4
- language: amh
4
+ language: iso-639-2:amh
5
5
  source_script: Ethi
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Amharic (2011)
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: ara
4
+ language: iso-639-2:ara
5
5
  source_script: Arab
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Arabic (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: ara_Arab2Latn_ALA_1997
11
+ description: Arabic ALA-Library of Congress 1997 System
8
12
  url: http://catdir.loc.gov/catdir/cpso/romanization/arabic.pdf
9
13
  creation_date: 1997
10
14
  description: |
@@ -1,11 +1,15 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: asm
4
+ language: iso-639-2:asm
5
5
  source_script: Deva
6
6
  destination_script: Latn
7
7
  name: Assamese Romanization, 1997
8
- url: https://www.loc.gov/catdir/cpso/romanization/assamese.pdf
8
+ alias:
9
+ ogc11122:
10
+ code: asm_Deva2Latn_ALA_1997
11
+ description: Assamese ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/assamese.pdf
9
13
  creation_date: 1997
10
14
  description: |
11
15
  ALA-LC Romanization table for Assamese
@@ -47,7 +51,7 @@ tests:
47
51
  - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
48
52
  expected: "kamaumabaāira maeẏarara daehata kaobhaiḍa pajaiṭaibha"
49
53
  - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
50
- expected: "ṭauiṭaāraযogae khaoda sadaraī karae ei kathaā"
54
+ expected: "ṭauiṭaāraẏaogae khaoda sadaraī karae ei kathaā"
51
55
  - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
52
56
  expected: "lakhaimapaura jailaāra naāraāẏaṇapaurara barapathaārata ājai paraśaānatai dhaāma naāmaerae ekhana baṛdadhaāśaramara śaubhaāramabha karaā haẏa"
53
57
 
@@ -119,6 +123,7 @@ map:
119
123
 
120
124
  #Semivowels
121
125
  'য়': 'ya'
126
+ 'য': 'ẏa'
122
127
  'য়': 'ẏa'
123
128
  'ৰ': 'ra'
124
129
  'ল': 'la'
@@ -156,4 +161,5 @@ map:
156
161
  '\u09c8': 'ai'
157
162
  '\u09cb': 'o'
158
163
  '\u09cc': 'au'
164
+ '।': '.'
159
165
  '\u09CD': '' # Used for joining
@@ -0,0 +1,40 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2012
4
+ language: iso-639-2:asm
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Assamese Romanization, 2012
8
+ url: https://www.loc.gov/catdir/cpso/romanization/assamese.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Assamese
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
25
+ birāma.
26
+
27
+ - Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
28
+ labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
29
+
30
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
31
+
32
+ tests:
33
+ - source: "ৰাজ্যিক স্বাস্থ্য মন্ত্ৰী পীয়ুষ হাজৰিকাৰ বিৰুদ্ধে দাখিল কৰা হৈছে এজাহাৰ।"
34
+ expected: "raājaẏaika sabaāsathaẏa manataraī paīyausha haājaraikaāra bairaudadhae daākhaila karaā haaichae ejaāhaāra."
35
+ - source: "কোৰোনা মহামাৰীৰ এই সময়ত সভাখনত হাজাৰ হাজাৰ লোকে মাস্ক পৰিধান নকৰাৰ লগতে সামাজিক দূৰত্ব নমনাৰ অভিযোগ উত্থাপন কৰা হৈছে"
36
+ expected: "kaoraonaā mahaāmaāraīra ei samayata sabhaākhanata haājaāra haājaāra laokae maāsaka paraidhaāna nakaraāra lagatae saāmaājaika daūrataba namanaāra abhaiẏaoga utathaāpana karaā haaichae"
37
+
38
+ map:
39
+
40
+ inherit: "alalc-asm-Deva-Latn-1997"
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: aze
4
+ language: iso-639-2:aze
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Azerbaijani (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: aze_Cyrl2Latn_ALA_1997
11
+ description: Azerbaijani ALA-Library of Congress 1997 System
8
12
  url: https://transliteration.eki.ee/pdf/Azerbaijani.pdf
9
13
  creation_date: 1997
10
14
  description: |
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: bel
4
+ language: iso-639-2:bel
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Byelorussian (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: bel_Cyrl2Latn_ALA_1997
11
+ description: Byelorussian ALA-Library of Congress 1997 System
8
12
  url: http://catdir.loc.gov/catdir/cpso/romanization/beloruss.pdf
9
13
  creation_date: 1997
10
14
 
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 2017
4
- language: ben
4
+ language: iso-639-2:ben
5
5
  source_script: Beng
6
6
  destination_script: Latn
7
7
  name: Bengali Romanization, 2017
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: bul
4
+ language: iso-639-2:bul
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization Table -- Bulgarian (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: bul_Cyrl2Latn_ALA_1997
11
+ description: Bulgarian ALA-Library of Congress Bulgarian 1997 System
8
12
  url: http://www.rechtertie.nl/databases/judd/downloads/Bulgarian.pdf
9
13
  creation_date: 1997
10
14
 
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: ell
4
+ language: iso-639-2:ell
5
5
  source_script: Grek
6
6
  destination_script: Latn
7
7
  name: Greek Romanization, 1997
8
+ alias:
9
+ ogc11122:
10
+ code: ell_Grek2Latn_ALA_1997
11
+ description: Greek ALA-Library of Congress 1997 System
8
12
  url: http://catdir.loc.gov/catdir/cpso/romanization/greek.pdf
9
13
  creation_date: 1997
10
14
  description: |
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 2010
4
- language: ell
4
+ language: iso-639-2:ell
5
5
  source_script: Grek
6
6
  destination_script: Latn
7
7
  name: Greek Romanization, 2010
@@ -27,7 +27,6 @@ tests:
27
27
 
28
28
  Giannēs Makrygiannēs.
29
29
 
30
-
31
30
  - source: ΑΘΗΝΑ
32
31
  expected: ATHĒNA
33
32
  - source: μπαμπάκι
@@ -0,0 +1,266 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:guj
5
+ source_script: Gujr
6
+ destination_script: Latn
7
+ name: Gujarati Romanization, 1997
8
+ alias:
9
+ ogc11122:
10
+ code: guj_Gujr2Latn_ALA_1997
11
+ description: Gujarati ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/gujarati.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Gujarati
16
+
17
+ notes:
18
+
19
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
20
+ vowels following a consonant can be found in grammars; no distinction between the two is
21
+ made in transliteration.
22
+
23
+ - |
24
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
25
+ transliteration, with the following exceptions:
26
+ a) when another vowel is indicated by its appropriate sign and
27
+ b) when the absence of any vowel is indicated by the subscript symbol ( ્ ) called halanta or
28
+ virāma.
29
+
30
+ - |
31
+ Exception: Anusvāra is transliterated by:
32
+ a) ṅ before gutturals,
33
+ b) ñ before palatals,
34
+ c) ṇ before cerebrals,
35
+ d) n before dentals, and
36
+ e) m before labials.
37
+
38
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
39
+
40
+ tests:
41
+ - source: "અમિત શાહનો કોરોના રિપોર્ટ ૨ ઓગસ્ટે પોઝિટિવ આવ્યો હતો, ત્યારથી તેમનું સ્વાસ્થ્ય સારું નથી"
42
+ expected: "amita śāhanȏ kȏrȏnā ripȏrṭa 2 ȏgasṭȇ pȏjhiṭiva āvyȏ hatȏ, tyārathī tȇmanuṃ svāsthya sāruṃ nathī"
43
+ - source: "મેદાંતા હોસ્પિટલમાં તેમનો ઇલાજ ચાલી રહ્યો હતો"
44
+ expected: "mȇdāntā hȏspiṭalamāṃ tȇmanȏ ilāja cālī rahyȏ hatȏ"
45
+ - source: "ભારતના વિશ્વનાથન આનંદે શેનયાનમાં પહેલો ફિડે શતરંજ વિશ્વ કપ જીત્યો"
46
+ expected: "bhāratanā viśvanāthana ānandȇ śȇnayānamāṃ pahȇlȏ phiḍȇ śatarañja viśva kapa jītyȏ"
47
+ - source: "ભારતીય વડા પ્રધાન જવાહરલાલ નેહરુએ ૪૦ લાખ હિન્દુઓ અને મુસલમાનોના પારસ્પરિક સ્થાનાંતરણનું સૂચન આપ્યું"
48
+ expected: "bhāratīya vaḍā pradhāna javāharalāla nȇharuȇ 40 lākha hinduȏ anȇ musalamānȏnā pārasparika sthānāntaraṇanuṃ sūcana āpyuṃ"
49
+ - source: "લિબિયાના એલ અજિજિયામાં ધરતી પર સૌથી વધુ તાપમાન નોંધાયું. એ વખતે છાયામાં નોંધવામાં આવેલું તાપમાન ૫૮ ડિગ્રી સેલ્સિયસ હતું."
50
+ expected: "libiyānā ȇla ajijiyāmāṃ dharatī para sauthī vadhu tāpamāna nȏndhāyuṃ. ȇ vakhatȇ chāyāmāṃ nȏndhavāmāṃ āvȇluṃ tāpamāna 58 ḍigrī sȇlsiyasa hatuṃ."
51
+ - source: "પ્રથમ વિશ્વયુદ્ધઃ જર્મની અને ફ્રાન્સ વચ્ચે એસ્નેની લડાઈ શરૂ થઈ હતી"
52
+ expected: "prathama viśvayuddhaḥ jarmanī anȇ phrānsa vaccȇ ȇsnȇnī laḍāī śarū thaī hatī"
53
+ - source: "એન્ગ્લો-મિસ્ત્ર યુદ્ધઃ તેલ અલ કેબિરનું યુદ્ધ લડવામાં આવ્યું હતું."
54
+ expected: "ȇnglȏ-mistra yuddhaḥ tȇla ala kȇbiranuṃ yuddha laḍavāmāṃ āvyuṃ hatuṃ."
55
+ - source: "પુરાવા ન હતા, એ જ કારણે કેસ ચાલ્યો નહીં, પણ તેમને નજરકેદ રાખવામાં આવ્યા"
56
+ expected: "purāvā na hatā, ȇ ja kāraṇȇ kȇsa cālyȏ nahīṃ, paṇa tȇmanȇ najarakȇda rākhavāmāṃ āvyā"
57
+ - source: "સરદાર પટેલે નક્કી કર્યું હતું કે કાશ્મીર ભારતનો હિસ્સો બનશે; ૯૧ વર્ષ પહેલાં લાહોર જેલમાં ભૂખહડતાળ દરમિયાન શહીદ થયા હતા જતીન દાસ"
58
+ expected: "saradāra paṭȇlȇ nakkī karyuṃ hatuṃ kȇ kāśmīra bhāratanȏ hissȏ banaśȇ; 91 varsha pahȇlāṃ lāhȏra jȇlamāṃ bhūkhahaḍatāḷa daramiyāna śahīda thayā hatā jatīna dāsa"
59
+ - source: "કોરોના પ્રોટોકોલ વચ્ચે આજે મેડિકલ પ્રવેશ પરીક્ષા લેવાશેઃ એન્ટ્રી ટચ ફ્રી રહેશે, એડમિટ કાર્ડ બાર કોડથી ચેક થશે"
60
+ expected: "kȏrȏnā prȏṭȏkȏla vaccȇ ājȇ mȇḍikala pravȇśa parīkshā lȇvāśȇḥ ȇnṭrī ṭaca phrī rahȇśȇ, ȇḍamiṭa kārḍa bāra kȏḍathī cȇka thaśȇ"
61
+ - source: "અલ્ ક઼`ઇદ્ માં હવામાન"
62
+ expected: "al ka`id māṃ havāmāna"
63
+ - source: "મંત્રાલય તથા ખ઼.ય ના વિ૨ષ્ઠ અધિકા૨ીઓ ઉપસ્થિત ૨હ્યા હતા"
64
+ expected: "mantrālaya tathā kha.ya nā vi2shṭha adhikā2īȏ upasthita 2hyā hatā"
65
+
66
+
67
+ map:
68
+
69
+ rules:
70
+ # note 3
71
+ - pattern: \u0A82(?=[કખગઘઙ])
72
+ result: ṅ
73
+ - pattern: \u0A82(?=[ચછજઝઞ])
74
+ result: ñ
75
+ - pattern: \u0A82(?=[ટઠડઢણ])
76
+ result: ṇ
77
+ - pattern: \u0A82(?=[તથદધન])
78
+ result: n
79
+ - pattern: \u0A82(?=[પફબભમ])
80
+ result: m
81
+
82
+ # note[2(a,b)]
83
+ - pattern: ([ક]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
84
+ result: 'k'
85
+ - pattern: ([ખ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
86
+ result: 'kh'
87
+ - pattern: ([ગ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
88
+ result: 'g'
89
+ - pattern: ([ઘ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
90
+ result: 'gh'
91
+ - pattern: ([ઙ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
92
+ result: 'ṅ'
93
+ - pattern: ([ચ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
94
+ result: 'c'
95
+ - pattern: ([છ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
96
+ result: 'ch'
97
+ - pattern: ([જ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
98
+ result: 'j'
99
+ - pattern: ([ઝ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
100
+ result: 'jh'
101
+ - pattern: ([ઞ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
102
+ result: 'ñ'
103
+ - pattern: ([ટ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
104
+ result: 'ṭ'
105
+ - pattern: ([ઠ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
106
+ result: 'ṭh'
107
+ - pattern: ([ડ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
108
+ result: 'ḍ'
109
+ - pattern: ([ઢ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
110
+ result: 'ḍh'
111
+ - pattern: ([ણ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
112
+ result: 'ṇ'
113
+ - pattern: ([ત]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
114
+ result: 't'
115
+ - pattern: ([થ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
116
+ result: 'th'
117
+ - pattern: ([દ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
118
+ result: 'd'
119
+ - pattern: ([ધ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
120
+ result: 'dh'
121
+ - pattern: ([ન]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
122
+ result: 'n'
123
+ - pattern: ([પ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
124
+ result: 'p'
125
+ - pattern: ([ફ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
126
+ result: 'ph'
127
+ - pattern: ([બ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
128
+ result: 'b'
129
+ - pattern: ([ભ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
130
+ result: 'bh'
131
+ - pattern: ([મ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
132
+ result: 'm'
133
+ - pattern: ([ય]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
134
+ result: 'y'
135
+ - pattern: ([ર]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
136
+ result: 'r'
137
+ - pattern: ([લ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
138
+ result: 'l'
139
+ - pattern: ([ળ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
140
+ result: 'ḷ'
141
+ - pattern: ([વ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
142
+ result: 'v'
143
+ - pattern: ([શ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
144
+ result: 'ś'
145
+ - pattern: ([ષ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
146
+ result: 'sh'
147
+ - pattern: ([સ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
148
+ result: 's'
149
+ - pattern: ([હ]=?)(?=[\u0abe\u0abf\u0ac0\u0ac1\u0ac2\u0ac3\u0ac5\u0ac7\u0ac8\u0ac9\u0acb\u0acc\u0acd])
150
+ result: 'h'
151
+
152
+
153
+ characters:
154
+
155
+ 'અ': 'a'
156
+ 'આ': 'ā'
157
+ 'ઇ': 'i'
158
+ 'ઈ': 'ī'
159
+ 'ઉ': 'u'
160
+ 'ઊ': 'ū'
161
+ 'ઋ': 'ṛ'
162
+
163
+ 'ઍ': 'e'
164
+ 'એ': 'ȇ'
165
+ 'ઐ': 'ai'
166
+
167
+ 'ઑ': 'o'
168
+ 'ઓ': 'ȏ'
169
+ 'ઔ': 'au'
170
+
171
+ # II. Consonants (see Note 2)
172
+ # Gutturals
173
+ 'ક': 'ka'
174
+ 'ખ': 'kha'
175
+ 'ગ': 'ga'
176
+ 'ઘ': 'gha'
177
+ 'ઙ': 'ṅa'
178
+
179
+ # Palatals
180
+ 'ચ': 'ca'
181
+ 'છ': 'cha'
182
+ 'જ': 'ja'
183
+ 'ઝ': 'jha'
184
+ 'ઞ': 'ña'
185
+
186
+ # Cerebrals
187
+ 'ટ': 'ṭa'
188
+ 'ઠ': 'ṭha'
189
+ 'ડ': 'ḍa'
190
+ 'ઢ': 'ḍha'
191
+ 'ણ': 'ṇa'
192
+
193
+ # Dentals
194
+ 'ત': 'ta'
195
+ 'થ': 'tha'
196
+ 'દ': 'da'
197
+ 'ધ': 'dha'
198
+ 'ન': 'na'
199
+
200
+ # Labials
201
+ 'પ': 'pa'
202
+ 'ફ': 'pha'
203
+ 'બ': 'ba'
204
+ 'ભ': 'bha'
205
+ 'મ': 'ma'
206
+
207
+ # Semivowels
208
+ 'ય': 'ya'
209
+ 'ર': 'ra'
210
+ 'લ': 'la'
211
+ 'ળ': 'ḷa'
212
+ 'વ': 'va'
213
+
214
+ # Sibilants
215
+ 'શ': 'śa'
216
+ 'ષ': 'sha'
217
+ 'સ': 'sa'
218
+
219
+
220
+ # Aspirate
221
+ 'હ': 'ha'
222
+
223
+ # Bisarga
224
+ 'ઃ': 'ḥ'
225
+
226
+ # Anusvāra
227
+ 'ં': 'ṃ'
228
+
229
+ # Abagraha (see Note 4)
230
+ 'ઽ': '’' # (apostrophe)
231
+
232
+ # Medials # Needed for connecting constants
233
+
234
+ 'ા': 'ā'
235
+ 'િ': 'i'
236
+ 'ી': 'ī'
237
+ 'ુ': 'u'
238
+ 'ૂ': 'ū'
239
+ 'ૃ': 'ṛ'
240
+ 'ૅ': 'e'
241
+ 'ે': 'ȇ'
242
+ 'ૈ': 'ai'
243
+ 'ૉ': 'o'
244
+ 'ો': 'ȏ'
245
+ 'ૌ': 'au'
246
+
247
+ # digits
248
+
249
+ '૦': '0'
250
+ '૧': '1'
251
+ '૨': '2'
252
+ '૩': '3'
253
+ '૪': '4'
254
+ '૫': '5'
255
+ '૬': '6'
256
+ '૭': '7'
257
+ '૮': '8'
258
+ '૯': '9'
259
+
260
+
261
+
262
+ '્': ''
263
+ '઼': ''
264
+ '।': '.'
265
+ '\u09CD': '' # Used for joining
266
+ "‍": ''# Used for joining