interscript 0.1.6 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (251) hide show
  1. checksums.yaml +4 -4
  2. data/lib/interscript.rb +10 -6
  3. data/lib/interscript/fs.rb +0 -2
  4. data/lib/interscript/mapping.rb +1 -1
  5. data/lib/interscript/opal.rb +38 -8
  6. data/lib/interscript/opal/entrypoint.rb +12 -0
  7. data/lib/interscript/opal/map_translate.rb +7 -0
  8. data/lib/interscript/version.rb +1 -1
  9. data/maps/acadsin-zho-Hani-Latn-2002.yaml +5 -1
  10. data/maps/alalc-amh-Ethi-Latn-1997.yaml +5 -1
  11. data/maps/alalc-amh-Ethi-Latn-2011.yaml +1 -1
  12. data/maps/alalc-ara-Arab-Latn-1997.yaml +5 -1
  13. data/maps/alalc-asm-Deva-Latn-1997.yaml +9 -3
  14. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  15. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +5 -1
  16. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +5 -1
  17. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  18. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  19. data/maps/alalc-ell-Grek-Latn-1997.yaml +5 -1
  20. data/maps/alalc-ell-Grek-Latn-2010.yaml +1 -2
  21. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  22. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  23. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  24. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  25. data/maps/alalc-kat-Geok-Latn-1997.yaml +1 -1
  26. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  27. data/maps/alalc-kor-Hang-Latn-1997.yaml +5 -1
  28. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  29. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  30. data/maps/alalc-mar-Deva-Latn-1997.yaml +21 -2
  31. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  32. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +1 -1
  33. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  34. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  35. data/maps/{alalc-pan-Deva-Latn-1997.yaml → alalc-pan-Guru-Latn-1997.yaml} +23 -4
  36. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  37. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  38. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  39. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  40. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +5 -1
  41. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +1 -1
  42. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  43. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  44. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  45. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  46. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +1 -1
  47. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  48. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  49. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  50. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  51. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  52. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -1
  53. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  54. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -1
  55. data/maps/bgn-kor-Hang-Latn-1943.yaml +7 -3
  56. data/maps/bgn-kor-Kore-Latn-1943.yaml +3 -3
  57. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  58. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  59. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +5 -1
  60. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +5 -1
  61. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  62. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +2 -2
  63. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +6 -2
  64. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +5 -1
  65. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +5 -1
  66. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  67. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  68. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +5 -1
  69. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -1
  70. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -1
  71. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  72. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  73. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +1 -1
  75. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +1 -1
  76. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +1 -1
  77. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  78. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +1 -1
  79. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  80. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +31 -1
  81. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  82. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  83. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  84. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  85. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  86. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  87. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  88. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  89. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -1
  90. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  91. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  92. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  93. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +1 -1
  94. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +1 -1
  95. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +1 -1
  96. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +1 -1
  97. data/maps/{bis-gjr-Gujr-Latn-13194-1991.yaml → bis-guj-Gujr-Latn-13194-1991.yaml} +17 -2
  98. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +1 -1
  99. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +1 -1
  100. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +1 -1
  101. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +1 -1
  102. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +1 -1
  103. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +1 -1
  104. data/maps/by-bel-Cyrl-Latn-1998.yaml +5 -1
  105. data/maps/by-bel-Cyrl-Latn-2007.yaml +1 -1
  106. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  107. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  108. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  109. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  110. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  111. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  112. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  113. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  114. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  115. data/maps/dos-nep-Deva-Latn-1997.yaml +15 -1
  116. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +3 -3
  117. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +2 -2
  118. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -2
  119. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -2
  120. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -1
  121. data/maps/gki-bel-Cyrl-Latn-1992.yaml +1 -1
  122. data/maps/gki-bel-Cyrl-Latn-2000.yaml +1 -1
  123. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +7 -3
  124. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  125. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  126. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -1
  127. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -1
  128. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -1
  129. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  130. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  131. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -1
  132. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  133. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -1
  134. data/maps/iso-ara-Arab-Latn-233-1984.yaml +1 -1
  135. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  136. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  137. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +8 -4
  138. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -4
  139. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  140. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  141. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  142. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  143. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  144. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  145. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  146. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  147. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  148. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  149. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  150. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  151. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  152. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  153. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  154. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  155. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  156. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  157. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  158. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -3
  159. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  160. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  161. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  162. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +1 -1
  163. data/maps/kp-kor-Hang-Latn-2002.yaml +25 -17
  164. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +1 -1
  165. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  166. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  167. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  168. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  169. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  170. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +2 -2
  171. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +3 -3
  172. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +2 -2
  173. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +2 -2
  174. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  175. data/maps/odni-aze-Cyrl-Latn-2015.yaml +1 -1
  176. data/maps/odni-bel-Cyrl-Latn-2015.yaml +1 -1
  177. data/maps/odni-bul-Cyrl-Latn-2015.yaml +2 -2
  178. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  179. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  180. data/maps/odni-hin-Deva-Latn-2015.yaml +1 -1
  181. data/maps/odni-kat-Geor-Latn-2015.yaml +1 -1
  182. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +1 -1
  183. data/maps/odni-kir-Cyrl-Latn-2015.yaml +1 -1
  184. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  185. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +1 -1
  186. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  187. data/maps/odni-rus-Cyrl-Latn-2015.yaml +1 -1
  188. data/maps/odni-srp-Cyrl-Latn-2015.yaml +1 -1
  189. data/maps/odni-tat-Cyrl-Latn-2015.yaml +1 -1
  190. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +1 -1
  191. data/maps/odni-uig-Cyrl-Latn-2015.yaml +1 -1
  192. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +1 -1
  193. data/maps/odni-urd-Arab-Latn-2015.yaml +1 -1
  194. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +1 -1
  195. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +1 -1
  196. data/maps/royin-tha-Thai-Latn-1968.yaml +5 -1
  197. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +1 -1
  198. data/maps/royin-tha-Thai-Latn-1999.yaml +5 -1
  199. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  200. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  201. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  202. data/maps/ses-ara-Arab-Latn-1930.yaml +5 -1
  203. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  204. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  205. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  206. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  207. data/maps/{ungegn-amh-Ethi-Latn-2016.yaml → un-amh-Ethi-Latn-2016.yaml} +51 -24
  208. data/maps/un-ara-Arab-Latn-1971.yaml +1 -1
  209. data/maps/un-ara-Arab-Latn-1972.yaml +1 -1
  210. data/maps/un-ara-Arab-Latn-2017.yaml +1 -1
  211. data/maps/un-bel-Cyrl-Latn-2007.yaml +1 -1
  212. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  213. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +2 -2
  214. data/maps/un-ell-Grek-Latn-1987-tl.yaml +2 -2
  215. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -3
  216. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  217. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  218. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  219. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  220. data/maps/un-nep-Deva-Latn-1972.yaml +204 -17
  221. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  222. data/maps/un-rus-Cyrl-Latn-1987.yaml +1 -1
  223. data/maps/un-ukr-Cyrl-Latn-1998.yaml +35 -12
  224. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  225. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  226. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  227. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  228. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +1 -1
  229. data/maps/var-kor-Hang-Latn-mr-1939.yaml +2 -2
  230. data/maps/var-kor-Kore-Hang-2013.yaml +1 -1
  231. data/maps/var-kor-Kore-Latn-mr-1939.yaml +1 -1
  232. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  233. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  234. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  235. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  236. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  237. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  238. data/maps/var-tha-Thai-Thai-phonemic.yaml +1 -1
  239. data/maps/var-tha-Thai-Zsym-ipa.yaml +1 -1
  240. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +6 -2
  241. data/spec/interscript/filenames_spec.rb +384 -0
  242. data/spec/interscript_spec.rb +7 -4
  243. metadata +105 -26
  244. data/bin/interscript +0 -41
  245. data/bin/rspec +0 -29
  246. data/bin/setup +0 -8
  247. data/lib/__pycache__/g2pwrapper.cpython-38.pyc +0 -0
  248. data/lib/interscript-opal.rb +0 -2
  249. data/lib/interscript/opal_map_translate.rb +0 -12
  250. data/maps/alalc-hin-Deva-Latn-2020.yaml +0 -159
  251. data/maps/un-mon-Mong-Latn-2013.yaml +0 -99
@@ -0,0 +1,144 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2012
4
+ language: iso-639-2:pli
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Pali Romanization, 2012
8
+ url: https://www.loc.gov/catdir/cpso/romanization/pali.pdf
9
+ creation_date: 2012
10
+ description: |
11
+ ALA-LC Romanization table for Pali
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ् ) called halanta or
25
+ virāma.
26
+
27
+ - |
28
+ Exception: Niggahīta combinations representing nasals are romanized by ṅ
29
+ before gutturals, ñ before palatals, ṇ before cerebrals, n before dentals, and m before
30
+ labials.
31
+
32
+ tests:
33
+ - source: "तेन खो पन समयेन वेसालिया अविदूरे कलन्दगामो नाम अत्थि"
34
+ expected: "taena khao pana samayaena vaesaālaiyaā avaidaūrae kalanadagaāmao naāma atathai"
35
+ - source: "तत्थ सुदिन्‍नो नाम कलन्दपुत्तो सेट्ठिपुत्तो होति"
36
+ expected: "tatatha saudainanao naāma kalanadapautatao saeṭaṭhaipautatao haotai"
37
+ - source: "अथ खो सुदिन्‍नो कलन्दपुत्तो सम्बहुलेहि"
38
+ expected: "atha khao saudainanao kalanadapautatao samabahaulaehai"
39
+ - source: "तथा चतुर्भिः पुरुषः परीक्ष्यते त्यागेन शीलेन गुणेन कर्मणा"
40
+ expected: "tathaā cataurabhaiḥ paurauṣaḥ paraīkaṣayatae tayaāgaena śaīlaena gauṇaena karamaṇaā"
41
+ - source: "अथ खो सुदिन्‍नो कलन्दपुत्तो अचिरवुट्ठिताय परिसाय येन भगवा तेनुपसङ्कमि; उपसङ्कमित्वा भगवन्तं अभिवादेत्वा एकमन्तं निसीदि"
42
+ expected: "atha khao saudainanao kalanadapautatao acairavauṭaṭhaitaāya paraisaāya yaena bhagavaā taenaupasaṅakamai; upasaṅakamaitavaā bhagavanataṃ abhaivaādaetavaā ekamanataṃ naisaīdai"
43
+ - source: "अथ खो सुदिन्‍नस्स कलन्दपुत्तस्स मातापितरो सुदिन्‍नं कलन्दपुत्तं एतदवोचुं"
44
+ expected: "atha khao saudainanasasa kalanadapautatasasa maātaāpaitarao saudainanaṃ kalanadapautataṃ etadavaocauṃ"
45
+ - source: "त्वं खोसि, तात सुदिन्‍न, अम्हाकं एकपुत्तको पियो मनापो सुखेधितो सुखपरिहतो"
46
+ expected: "tavaṃ khaosai, taāta saudainana, amahaākaṃ ekapautatakao paiyao manaāpao saukhaedhaitao saukhaparaihatao"
47
+ - source: "न त्वं, तात सुदिन्‍न, किञ्‍चि दुक्खस्स जानासि"
48
+ expected: "na tavaṃ, taāta saudainana, kaiñacai daukakhasasa jaānaāsai"
49
+ - source: "अनुञ्‍ञातोम्हि किर मातापितूहि अगारस्मा अनगारियं पब्बज्‍जाया’’ति, हट्ठो उदग्गो पाणिना गत्तानि परिपुञ्छन्तो वुट्ठासि"
50
+ expected: "anauñañaātaomahai kaira maātaāpaitaūhai agaārasamaā anagaāraiyaṃ pababajajaāyaā’’tai, haṭaṭhao udagagao paāṇainaā gatataānai paraipauñachanatao vauṭaṭhaāsai"
51
+
52
+ map:
53
+
54
+ rules:
55
+ # note[3]
56
+ - pattern: \u0902(?=[कखगघङ])
57
+ result: ṅ
58
+ - pattern: \u0902(?=[चछजझञ])
59
+ result: ñ
60
+ - pattern: \u0902(?=[टठडढण])
61
+ result: ṇ
62
+ - pattern: \u0902(?=[तथदधन])
63
+ result: n
64
+
65
+ characters:
66
+
67
+ # I. Vowels and Diphthongs (see Note 1)
68
+
69
+ 'अ': 'a'
70
+ 'आ': 'ā'
71
+ 'इ': 'i'
72
+ 'ई': 'ī'
73
+ 'उ': 'u'
74
+ 'ऊ': 'ū'
75
+ 'ए': 'e'
76
+ 'ओ': 'o'
77
+
78
+ # II. Consonants (see Note 2)
79
+ # Gutturals
80
+ 'क': 'ka'
81
+ 'ख': 'kha'
82
+ 'ग': 'ga'
83
+ 'घ': 'gha'
84
+ 'ङ': 'ṅa'
85
+
86
+ # Palatals
87
+ 'च': 'ca'
88
+ 'छ': 'cha'
89
+ 'ज': 'ja'
90
+ 'झ': 'jha'
91
+ 'ञ': 'ña'
92
+
93
+ # Cerebrals
94
+ 'ट': 'ṭa'
95
+ 'ठ': 'ṭha'
96
+ 'ड': 'ḍa'
97
+ 'ढ': 'ḍha'
98
+ 'ण': 'ṇa'
99
+
100
+ # Dentals
101
+ 'त': 'ta'
102
+ 'थ': 'tha'
103
+ 'द': 'da'
104
+ 'ध': 'dha'
105
+ 'न': 'na'
106
+
107
+ # Labials
108
+ 'प': 'pa'
109
+ 'फ': 'pha'
110
+ 'ब': 'ba'
111
+ 'भ': 'bha'
112
+ 'म': 'ma'
113
+
114
+ # Semivowels
115
+ 'य': 'ya'
116
+ 'र': 'ra'
117
+ 'ल': 'la'
118
+ 'ळ': 'ḻa'
119
+ 'व': 'va'
120
+
121
+ # Sibilants
122
+ 'श': 'śa'
123
+ 'ष': 'ṣa'
124
+ 'स': 'sa'
125
+
126
+ # Aspirate
127
+ 'ह': 'ha'
128
+
129
+ # Visagga
130
+ 'ः': 'ḥ'
131
+
132
+ # Niggahīta/Anusvāra
133
+ 'ं': 'ṃ'
134
+
135
+ # Medials # Needed for connecting constants
136
+ 'ा': "ā"
137
+ 'ि': "i"
138
+ 'ी': "ī"
139
+ 'ु': "u"
140
+ 'ू': "ū"
141
+ 'े': "e"
142
+ 'ो': "o"
143
+ '्': ""
144
+ '‍': ''# Used for joining
@@ -0,0 +1,47 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2012
4
+ language: iso-639-2:pra
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Prakrit Romanization, 2012
8
+ url: https://www.loc.gov/catdir/cpso/romanization/sanskrit.pdf
9
+ creation_date: 2012
10
+ description: |
11
+ ALA-LC Romanization table for Prakrit
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ् ) called halanta or
25
+ virāma.
26
+
27
+ - |
28
+ Exception: Anusvāra is transliterated by:
29
+
30
+ a) ṅ before gutturals,
31
+ b) ñ before palatals,
32
+ c) ṇ before cerebrals,
33
+ d) n before dentals, and
34
+ e) m before labials.
35
+ In other circumstances it is transliterated by a tilde (~) over the vowel.
36
+
37
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
38
+
39
+ tests:
40
+ - source: "सृष्टिस्थितिविनाशानां शक्तिभूते सनातनि"
41
+ expected: "saṛṣaṭaisathaitaivainaāśaānaāṃ śakataibhaūtae sanaātanai"
42
+ - source: "गुणाश्रये गुणमये नारायणि नमोऽस्तु ते"
43
+ expected: "gauṇaāśarayae gauṇamayae naāraāyaṇai namao’satau tae"
44
+
45
+ map:
46
+
47
+ inherit: "alalc-san-Deva-Latn-2012"
@@ -1,10 +1,14 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 1997
4
- language: rus
4
+ language: iso-639-2:rus
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization System 1997
8
+ alias:
9
+ ogc11122:
10
+ code: rus_Cyrl2Latn_ALA_1997
11
+ description: Russian ALA-Library of Congress 1997 System
8
12
  url: https://en.wikipedia.org/wiki/ALA-LC_romanization_for_Russian
9
13
  creation_date: 1997
10
14
  description: |
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  authority_id: alalc
3
3
  id: 2012
4
- language: rus
4
+ language: iso-639-2:rus
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
7
  name: ALA-LC Romanization System 2012
@@ -0,0 +1,172 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2012
4
+ language: iso-639-2:san
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Sanskrit Romanization, 2012
8
+ url: https://www.loc.gov/catdir/cpso/romanization/sanskrit.pdf
9
+ creation_date: 2012
10
+ description: |
11
+ ALA-LC Romanization table for Sanskrit
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ् ) called halanta or
25
+ virāma.
26
+
27
+ - |
28
+ Exception: Anusvāra is transliterated by:
29
+
30
+ a) ṅ before gutturals,
31
+ b) ñ before palatals,
32
+ c) ṇ before cerebrals,
33
+ d) n before dentals, and
34
+ e) m before labials.
35
+ In other circumstances it is transliterated by a tilde (~) over the vowel.
36
+
37
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
38
+
39
+ tests:
40
+ - source: "पूर्णमदः पूर्णमिदं पूर्णात् पूर्ण्मुदच्यते"
41
+ expected: "paūraṇamadaḥ paūraṇamaidaṃ paūraṇaāta paūraṇamaudacayatae"
42
+ - source: "पूर्णस्य पूर्णमादाय पूर्णमेवावशिष्यते"
43
+ expected: "paūraṇasaya paūraṇamaādaāya paūraṇamaevaāvaśaiṣayatae"
44
+ - source: "यथा चतुर्भिः कनकं परीक्ष्यते निर्घषणच्छेदन तापताडनैः"
45
+ expected: "yathaā cataurabhaiḥ kanakaṃ paraīkaṣayatae nairaghaṣaṇacachaedana taāpataāḍanaaiḥ"
46
+ - source: "तथा चतुर्भिः पुरुषः परीक्ष्यते त्यागेन शीलेन गुणेन कर्मणा"
47
+ expected: "tathaā cataurabhaiḥ paurauṣaḥ paraīkaṣayatae tayaāgaena śaīlaena gauṇaena karamaṇaā"
48
+ - source: "यो न हृष्यति न द्वेष्टि न शोचति न काङ्‍क्षति"
49
+ expected: "yao na haṛṣayatai na davaeṣaṭai na śaocatai na kaāṅakaṣatai"
50
+ - source: "शुभाशुभपरित्यागी भक्तिमान्यः स मे प्रियः"
51
+ expected: "śaubhaāśaubhaparaitayaāgaī bhakataimaānayaḥ sa mae paraiyaḥ"
52
+ - source: "सत्य -सत्यमेवेश्वरो लोके सत्ये धर्मः सदाश्रितः"
53
+ expected: "sataya -satayamaevaeśavarao laokae satayae dharamaḥ sadaāśaraitaḥ"
54
+ - source: "सत्यमूलनि सर्वाणि सत्यान्नास्ति परं पदम्"
55
+ expected: "satayamaūlanai saravaāṇai satayaānanaāsatai paraṃ padama"
56
+ - source: "पिता माताग्निरात्मा च गुरुश्च भरतर्षभ"
57
+ expected: "paitaā maātaāganairaātamaā ca gaurauśaca bharataraṣabha"
58
+
59
+ map:
60
+
61
+ rules:
62
+ # note[3]
63
+ - pattern: \u0902(?=[कखगघङ])
64
+ result: ṅ
65
+ - pattern: \u0902(?=[चछजझञ])
66
+ result: ñ
67
+ - pattern: \u0902(?=[टठडढण])
68
+ result: ṇ
69
+ - pattern: \u0902(?=[तथदधन])
70
+ result: n
71
+
72
+ characters:
73
+
74
+ # I. Vowels and Diphthongs (see Note 1)
75
+
76
+ 'अ': 'a'
77
+ 'आ': 'ā'
78
+ 'इ': 'i'
79
+ 'ई': 'ī'
80
+ 'उ': 'u'
81
+ 'ऊ': 'ū'
82
+ 'ऋ': 'ṛ'
83
+ 'ॠ': 'ṝ'
84
+ 'ऌ': 'ḷ'
85
+ 'ए': 'e'
86
+ 'ऐ': 'ai'
87
+ 'ओ': 'o'
88
+ 'औ': 'au'
89
+
90
+ # II. Consonants (see Note 2)
91
+ # Gutturals
92
+ 'क': 'ka'
93
+ 'ख': 'kha'
94
+ 'ग': 'ga'
95
+ 'घ': 'gha'
96
+ 'ङ': 'ṅa'
97
+
98
+ # Palatals
99
+ 'च': 'ca'
100
+ 'छ': 'cha'
101
+ 'ज': 'ja'
102
+ 'झ': 'jha'
103
+ 'ञ': 'ña'
104
+
105
+ # Cerebrals
106
+ 'ट': 'ṭa'
107
+ 'ठ': 'ṭha'
108
+ 'ड': 'ḍa'
109
+ 'ढ': 'ḍha'
110
+ 'ण': 'ṇa'
111
+
112
+ # Dentals
113
+ 'त': 'ta'
114
+ 'थ': 'tha'
115
+ 'द': 'da'
116
+ 'ध': 'dha'
117
+ 'न': 'na'
118
+
119
+ # Labials
120
+ 'प': 'pa'
121
+ 'फ': 'pha'
122
+ 'ब': 'ba'
123
+ 'भ': 'bha'
124
+ 'म': 'ma'
125
+
126
+ # Semivowels
127
+ 'य': 'ya'
128
+ 'र': 'ra'
129
+ 'ल': 'la'
130
+ 'ळ': 'ḻa'
131
+ 'व': 'va'
132
+
133
+ # Sibilants
134
+ 'श': 'śa'
135
+ 'ष': 'ṣa'
136
+ 'स': 'sa'
137
+
138
+ # Aspirate
139
+ 'ह': 'ha'
140
+
141
+ # Anusvāra
142
+ 'ं': 'ṃ'
143
+
144
+ # Bisarga
145
+ 'ः': 'ḥ'
146
+
147
+ # Anunāsika
148
+ 'ँ': 'm̐'
149
+
150
+ # Abagraha
151
+ 'ऽ': '’' # (apostrophe)
152
+
153
+ #Jihvāmūlīya
154
+ '\u0CF1': 'ẖ'
155
+
156
+ #Upadhmānīya
157
+ '\u0CF2': 'ḫ'
158
+
159
+ # Medials # Needed for connecting constants
160
+ 'ा': "ā"
161
+ 'ि': "i"
162
+ 'ी': "ī"
163
+ 'ु': "u"
164
+ 'ू': "ū"
165
+ 'ृ': "ṛ"
166
+ 'ॄ': "ṝ"
167
+ 'े': "e"
168
+ 'ै': "ai"
169
+ 'ो': "o"
170
+ 'ौ': "au"
171
+ '्': ""
172
+ '‍': ''# Used for joining
@@ -0,0 +1,292 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:sin
5
+ source_script: Sinh
6
+ destination_script: Latn
7
+ name: Sinhalese Romanization, 1997
8
+ alias:
9
+ ogc11122:
10
+ code: sin_Sinh2Latn_ALA_1997
11
+ description: Sinhalese ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/sinhales.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Sinhalese
16
+
17
+
18
+ notes:
19
+
20
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
21
+ vowels following a consonant can be found in grammars; no distinction between the two is
22
+ made in transliteration.
23
+
24
+ - |
25
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
26
+ transliteration, with the following exceptions:
27
+
28
+ a) when another vowel is indicated by its appropriate sign; and
29
+ b) when the absence of any vowel is indicated by the sign ් called virāma.
30
+
31
+ - |
32
+ Exceptions: Anusvāra is transliterated by:
33
+
34
+ a) ṅ before gutturals,
35
+ b) ñ before palatals,
36
+ c) ṇ before cerebrals,
37
+ d) n before dentals, and
38
+ e) m before labials.
39
+
40
+ - |
41
+ Exceptions:
42
+ a) when saññaka represents a nasal, it is romanized according to the rule for anusvāra.
43
+ b) when saññaka is combined with an aspirated consonant, the combination is romanized as a non-aspirated, followed by an aspirated consonant.
44
+
45
+
46
+ tests:
47
+ - source: "ශී‍්‍ර ලංකාවේ කී‍්‍රඩාව ඉතිහාසයේ ඉහළම තැනකට ගේන්න කටයුතු කරනවා"
48
+ expected: "śīra laṃkāvē kīraḍāva itihāsayē ihaḷama tănakaṭa gēnanna kaṭayutu karanavā"
49
+ - source: "කොච්චිකඬේ මෝයකට අසල නෑමට ගිය තරුණයෝ ෩ක් මරුට - මිතුරාගේ උපන් දිනය සැමරීමට ඇවිත්"
50
+ expected: "kocañcikaṇḍē mōyakaṭa asala nâmaṭa giya taruṇayō 3k maruṭa - miturāgē upan dinaya sămarīmaṭa ăvit"
51
+ - source: "ලෝක ළමා දිනයදා සිසුන් පිරිසක් කසිප්පු බීලා"
52
+ expected: "lōka ḷamā dinayadā sisun pirisak kasippu bīlā"
53
+ - source: "කෝටි 16ක හෙරොයින් සමග දන්කොටුවේදී 7ක් දැලේ"
54
+ expected: "kōṭi 16ka heroyin samaga danaṅkoṭuvēdī 7k dălē"
55
+ - source: "මිනුවන්ගොඩ පීසීආර් දෙදහසක් සිදුකරයි"
56
+ expected: "minuvanaṅgoḍa pīsīār dedahasak sidukarayi"
57
+ - source: "පාස්කු ප‍්‍රහාරය වගේම පාස්කු ප්‍රෝඩාව ගැනත් සොයන්න කොමිසමක් පත්කළ යුතුයි - විපක්‍ෂ නායක සජිත් පේ‍්‍රමදාස"
58
+ expected: "pāsaṅku parahāraya vagēma pāsaṅku prōḍāva gănat soyananna komisamak pataṅkaḷa yutuyi - vipakṣa nāyaka sajit pēramadāsa"
59
+ - source: "ට‍්‍රම්ප්ගේ සෞඛ්‍යය තීරණාත්මකයි - ට්විටර් හරහා ජනතාව අමතයි"
60
+ expected: "ṭarampaṅgē saukhyaya tīraṇātmakayi - ṭviṭar harahā janatāva amatayi"
61
+ - source: "පාස්කු දා ප‍්‍රහාරය පිළිබඳ පරීක්‍ෂණවලින් කිසිවකුට අසාධාරණයක් වීමට ඉඩ දෙන්නේ නෑ - අගමැති"
62
+ expected: "pāsaṅku dā parahāraya piḷibanda parīkṣaṇavalin kisivakuṭa asādhāraṇayak vīmaṭa iḍa denannē nâ - agamăti"
63
+ - source: "දිල්ලි කැපිටල්ස් සහ කෝලිගේ බැංගලෝර් තෙවැනි ජය ලබයි"
64
+ expected: "dilli kăpiṭals saha kōligē băṃgalōr tevăni jaya labayi"
65
+ - source: "ශ‍්‍රී ලාංකික සම්භවයක් සහිත ප‍්‍රංශයේ පවුලක 5 ක් ඝාතනය කරලා"
66
+ expected: "śarī lāṃkika sambhavayak sahita paraṃśayē pavulaka 5 k ghātanaya karalā"
67
+ - source: "පැතිකුදය ඉක්මනින් සුව කරන ප‍්‍රතිකාර"
68
+ expected: "pătikudaya ikmanin suva karana paratikāra"
69
+
70
+ map:
71
+
72
+ rules:
73
+ # note[3]
74
+ - pattern: \u0DCA(?=[කඛගඝඞ])
75
+ result: ṅ
76
+ - pattern: \u0DCA(?=[චඡජඣඤ])
77
+ result: ñ
78
+ - pattern: \u0DCA(?=[ටඨඩඪණ])
79
+ result: ṇ
80
+ - pattern: \u0DCA(?=[තථදධන])
81
+ result: n
82
+
83
+ # note[2(a,b)]
84
+ - pattern: ([ක]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
85
+ result: 'k'
86
+ - pattern: ([ඛ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
87
+ result: 'kh'
88
+ - pattern: ([ග]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
89
+ result: 'g'
90
+ - pattern: ([ඝ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
91
+ result: 'gh'
92
+ - pattern: ([ඞ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
93
+ result: 'ṅ'
94
+ - pattern: ([ච]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
95
+ result: 'c'
96
+ - pattern: ([ඡ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
97
+ result: 'ch'
98
+ - pattern: ([ජ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
99
+ result: 'j'
100
+ - pattern: ([ඣ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
101
+ result: 'jh'
102
+ - pattern: ([ඤ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
103
+ result: 'ñ'
104
+ - pattern: ([ට]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
105
+ result: 'ṭ'
106
+ - pattern: ([ඨ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
107
+ result: 'ṭh'
108
+ - pattern: ([ඩ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
109
+ result: 'ḍ'
110
+ - pattern: ([ඪ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
111
+ result: 'ḍh'
112
+ - pattern: ([ණ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
113
+ result: 'ṇ'
114
+ - pattern: ([ත]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
115
+ result: 't'
116
+ - pattern: ([ථ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
117
+ result: 'th'
118
+ - pattern: ([ද]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
119
+ result: 'd'
120
+ - pattern: ([ධ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
121
+ result: 'dh'
122
+ - pattern: ([න]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
123
+ result: 'n'
124
+ - pattern: ([ප]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
125
+ result: 'p'
126
+ - pattern: ([ඵ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
127
+ result: 'ph'
128
+ - pattern: ([බ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
129
+ result: 'b'
130
+ - pattern: ([භ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
131
+ result: 'bh'
132
+ - pattern: ([ම]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
133
+ result: 'm'
134
+ - pattern: ([ය]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
135
+ result: 'y'
136
+ - pattern: ([ර]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
137
+ result: 'r'
138
+ - pattern: ([ල]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
139
+ result: 'l'
140
+ - pattern: ([ළ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
141
+ result: 'ḷ'
142
+ - pattern: ([ව]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
143
+ result: 'v'
144
+ - pattern: ([ශ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
145
+ result: 'ś'
146
+ - pattern: ([ෂ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
147
+ result: 'ṣ'
148
+ - pattern: ([ස]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
149
+ result: 's'
150
+ - pattern: ([හ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
151
+ result: 'h'
152
+ - pattern: ([ඟ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
153
+ result: 'ṅg'
154
+ - pattern: ([ඦ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
155
+ result: 'ñj'
156
+ - pattern: ([ඬ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
157
+ result: 'ṇḍ'
158
+ - pattern: ([ඳ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
159
+ result: 'nd'
160
+ - pattern: ([ඹ]=?)(?=[\u0dcf\u0dd0\u0dd1\u0dd2\u0dd3\u0dd4\u0dd6\u0dd8\u0df2\u0ddf\u0df3\u0dd9\u0dda\u0ddb\u0ddc\u0ddd\u0dde\u0dca])
161
+ result: 'ṃb'
162
+
163
+
164
+ characters:
165
+
166
+ #Vowels and Diphthongs (see Note 1)
167
+
168
+ 'අ': 'a'
169
+ 'ආ': 'ā'
170
+ 'ඇ': 'ă'
171
+ 'ඈ': 'â'
172
+ 'ඉ': 'i'
173
+ 'ඊ': 'ī'
174
+ 'උ': 'u'
175
+ 'ඌ': 'ū'
176
+ 'ඍ': 'ṛ'
177
+ 'ඎ': 'ṝ'
178
+ 'ඏ': 'ḷ'
179
+ 'ඐ': 'ḹ'
180
+ 'එ': 'e'
181
+ 'ඒ': 'ē'
182
+ 'ඓ': 'ai'
183
+ 'ඔ': 'o'
184
+ 'ඕ': 'ō'
185
+ 'ඖ': 'au'
186
+
187
+ 'ා': 'ā'
188
+ 'ැ': 'ă'
189
+ 'ෑ': 'â'
190
+ 'ි': 'i'
191
+ 'ී': 'ī'
192
+ 'ු': 'u'
193
+ 'ූ': 'ū'
194
+ 'ෘ': 'ṛ'
195
+ 'ෲ': 'ṝ'
196
+ 'ෟ': 'ḷ'
197
+ 'ෳ': 'ḹ'
198
+ 'ෙ': 'e'
199
+ 'ේ': 'ē'
200
+ 'ෛ': 'ai'
201
+ 'ො': 'o'
202
+ 'ෝ': 'ō'
203
+ 'ෞ': 'au'
204
+
205
+
206
+ # II. Consonants (see Note 2)
207
+ # Gutturals
208
+ 'ක': 'ka'
209
+ 'ඛ': 'kha'
210
+ 'ග': 'ga'
211
+ 'ඝ': 'gha'
212
+ 'ඞ': 'ṅa'
213
+
214
+
215
+ # Palatals
216
+ 'ච': 'ca'
217
+ 'ඡ': 'cha'
218
+ 'ජ': 'ja'
219
+ 'ඣ': 'jha'
220
+ 'ඤ': 'ña'
221
+
222
+
223
+ # Cerebrals
224
+ 'ට': 'ṭa'
225
+ 'ඨ': 'ṭha'
226
+ 'ඩ': 'ḍa'
227
+ 'ඪ': 'ḍha'
228
+ 'ණ': 'ṇa'
229
+
230
+
231
+ # Dentals
232
+ 'ත': 'ta'
233
+ 'ථ': 'tha'
234
+ 'ද': 'da'
235
+ 'ධ': 'dha'
236
+ 'න': 'na'
237
+
238
+
239
+ # Labials
240
+ 'ප': 'pa'
241
+ 'ඵ': 'pha'
242
+ 'බ': 'ba'
243
+ 'භ': 'bha'
244
+ 'ම': 'ma'
245
+
246
+
247
+ # Semivowels
248
+ 'ය': 'ya'
249
+ 'ර': 'ra'
250
+ 'ල': 'la'
251
+ 'ළ': 'ḷa'
252
+ 'ව': 'va'
253
+
254
+ # Sibilants
255
+ 'ශ': 'śa'
256
+ 'ෂ': 'ṣa'
257
+ 'ස': 'sa'
258
+
259
+ # Aspirate
260
+ 'හ': 'ha'
261
+
262
+
263
+ # Visarga
264
+ 'ඃ': 'ḥ'
265
+
266
+ # Anusvāra (see Note 3)
267
+ 'ං': 'ṃ'
268
+
269
+
270
+ # numbers
271
+ '\u0DE6': '0'
272
+ '\u0DE7': '1'
273
+ '\u0DE8': '2'
274
+ '\u0DE9': '3'
275
+ '\u0DEA': '4'
276
+ '\u0DEB': '5'
277
+ '\u0DEC': '6'
278
+ '\u0DED': '7'
279
+ '\u0DEE': '8'
280
+ '\u0DEF': '9'
281
+
282
+
283
+ "‍": ''# Used for joining
284
+ "‌": ''# Used for non joining
285
+ '්': ''
286
+
287
+ # Rule no 4
288
+ 'ඟ': 'ṅga'
289
+ 'ඦ': 'ñja'
290
+ 'ඬ': 'ṇḍa'
291
+ 'ඳ': 'nda'
292
+ 'ඹ': 'ṃba'