interscript 0.1.2 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,366 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 233-3
4
+ language: iso-233-3:prs
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Persian language — Simplified transliteration
8
+ url: https://web.archive.org/web/20200920064754/http://www.freeprotocols.org/content/republished/doc.public/standards/communication/iso/iso-233/iso-233-3.pdf
9
+ creation_date: 1999
10
+ confirmation_date: 1999-01-15
11
+ description: |
12
+ This part of ISO 233 is one of a series of International
13
+ Standards, dealing with the conversion of systems of
14
+ writing. The aim of this part of ISO 233 and others in the
15
+ series is to provide a means for international
16
+ communication of written messages in a form which permits
17
+ the automatic transmission and reconstitution of these, by
18
+ men or machines. The system of conversion, in this case,
19
+ must be univocal and entirely reversible. This means that
20
+ no consideration should be given to phonetic and aesthetic
21
+ matters or to certain national customs: all these
22
+ considerations are, indeed, ignored by the machine
23
+ performing the function. The adoption of this part of ISO
24
+ 233 for international communication leaves every country
25
+ free to adopt for its own use a national standard which may
26
+ be different, on condition that it is compatible with this
27
+ part of ISO 233. The system proposed herein should make
28
+ this possible and be acceptable to international use if the
29
+ graphisms it creates are such that they may be converted
30
+ automatically into the graphisms used in any strict
31
+ national systems. This part of ISO 233 may be used by
32
+ anyone who has a clear understanding of the system and is
33
+ certain that it can be applied without ambiguity. The
34
+ result obtained will not give a correct pronunciation of
35
+ the original text in a person’s own language, but it will
36
+ serve as a means of finding automatically the original
37
+ graphism and thus allow anyone who has knowledge of the
38
+ original language to pronounce it correctly. Similarly, one
39
+ can only pronounce correctly a text written in, for
40
+ example, English or Polish, if one has a knowledge of
41
+ English or Polish. The adoption of national standards
42
+ compatible with this part of ISO 233 will permit the
43
+ representation, in an international publication, of the
44
+ morphemes of each language according to the customs of the
45
+ country where it is spoken. It will be possible to simplify
46
+ this representation in order to take into account the
47
+ number of the character sets available on different kinds
48
+ of machines.
49
+ 1-Scope:
50
+ This part of ISO 233 establishes a simplified
51
+ system for the transliteration of Persian characters into
52
+ Latin characters. This simplification of the stringent
53
+ rules established by ISO 233:1984 is especially intended to
54
+ facilitate the processing of bibliographic information (
55
+ e.g. catalogues, indices, citations, etc.)
56
+ 2-Normative references:
57
+ The following normative documents contain
58
+ provisions which, though reference in this text, constitute
59
+ provisions of this part of ISO 233. For dated references,
60
+ subsequent amendments to, or revisions of, any of these
61
+ publications do not apply. However, parties to agreements
62
+ based on this part of ISO 233 are encouraged to investigate
63
+ the possibility of applying the most recent editions of the
64
+ normative documents indicated below. For undated
65
+ references, the latest edition of the normative document
66
+ referred to applies. Members of ISO and IEC maintain
67
+ registers of currently valid International StandardsISO 233-
68
+ 2, Information and documentation — Transliteration of
69
+ Arabic characters into Latin characters — Part 2: Arabic
70
+ language — Simplified transliteration. ISO/IEC 10646-1,
71
+ Information Technology — Universal Multiple-Octet Coded
72
+ Character Set (UCS) — Part 1: Architecture and Basic
73
+ Multilingual Plane.
74
+
75
+ notes: |
76
+ TODO
77
+
78
+ tests:
79
+ - source: آذَر
80
+ expected: âẕar
81
+
82
+ - source: سَم
83
+ expected: sam
84
+
85
+ - source: پُر
86
+ expected: por
87
+
88
+ - source: پِدَر
89
+ expected: pedar
90
+
91
+ - source: مَثَلاً
92
+ expected: mas̱alâ´´
93
+
94
+ - source: جزء
95
+ expected: jz’
96
+
97
+ - source: رأس
98
+ expected: râ’s
99
+
100
+ - source: سؤال
101
+ expected: sv’âl
102
+
103
+ - source: مسئلة
104
+ expected: msy’lh
105
+
106
+
107
+ map:
108
+ characters:
109
+
110
+ # word-medial or word-final form where so appearing in a word.
111
+ # '\u0627': '-'
112
+
113
+ # # Vowel, Diphthong and Diacritical Characters
114
+
115
+ # '\u064E': 'a'
116
+
117
+ # # Both e and i are available to romanize this short vowel,
118
+ # # depending on local usage and/or root language. In cases where the sound
119
+ # # is uncertain, i is the default romanization in BGN/PCGN standardization
120
+ # # procedures.
121
+ # '\u0650':
122
+ # - 'e'
123
+ # - 'i'
124
+
125
+ # # Both o and u are available to romanize this short vowel,
126
+ # # depending on local usage and/or root language. In cases where the sound
127
+ # # is uncertain, u is the default romanization in BGN/PCGN standardization
128
+ # # procedures.
129
+ # '\u064F':
130
+ # - 'o'
131
+ # - 'u'
132
+ # '\u0659': 'ê'
133
+
134
+ # # An alif with mad ( آ ) is written only in the initial position by
135
+ # # BGN/PCGN standardization procedures, in keeping with Persian language
136
+ # # family standards of use of the Arabic alphabet. The same letter written
137
+ # # in a medial or final position is written . . .
138
+ # '\u0622': 'ā'
139
+
140
+ # pending issue #442
141
+ # '\u0648': 'ō'
142
+ # '\u0648': 'ū'
143
+ # '\u0648': 'ow'
144
+ # '\u06CC': 'ī'
145
+
146
+ # # Or 'ē'. The character ی should be romanized ay or ē according to
147
+ # # its root language or local pronunciation. In case of uncertainty a
148
+ # # reference source (such as the Fairchild Aerial Surveys map series, or a
149
+ # # BGN/PCGN approved policy document/list of recommended spellings) should
150
+ # # be consulted.
151
+ # '\u06CC': 'ay'
152
+ # '\u06D0': 'ē'
153
+
154
+ # # Or 'aī'. Both the combination ay and aī are available to romanize
155
+ # # this character according to its root language or local pronunciation.
156
+ # # In cases where the sound is uncertain ay is the default romanization in
157
+ # # BGN/PCGN standardization procedures
158
+ # '\u06CC':
159
+ # - 'ay'
160
+ # - 'á'
161
+ # '\u06CD': 'êy'
162
+ # '\u0621': '’'
163
+ # '\u0674':
164
+ # - '-e'
165
+ # - '-ye'
166
+
167
+ # # Other Diacritical Marks and Language Conventions
168
+
169
+ # '\u0627': 'āy'
170
+
171
+ # '\u0648': 'w'
172
+ # '\u0626': '’'
173
+ # '\u06C0': ''
174
+ # '\u0651': ''
175
+
176
+
177
+ # special rules
178
+
179
+ '\s(?=\u0622\u0628\u064E\u0627\u062F)': '' # space followed by abad is removed
180
+ '\ufdf2': 'Allāh' # See note 5
181
+
182
+ # pointing
183
+ '\u064e' : 'a' # َ fatha
184
+
185
+ '\u0650':
186
+ - 'e'
187
+ - 'i'
188
+ '\u0650\b' : '-e' # ِ kasra
189
+
190
+ '\u064f': # ُ damma
191
+ - 'o'
192
+ - 'u'
193
+
194
+ '\u0652' : '' # ْ sokoon
195
+ '\u0659': 'ê'
196
+
197
+ # special pointed letters
198
+ '\u0639\u064e' : '‘a' # عَ
199
+ '\u0639\u0650' : '‘i' # عِ
200
+ '\u0639\u064f' : '‘ū' # عُ
201
+ # handle MacOS regex difference
202
+ '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
203
+
204
+ '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
205
+ '\u0650\u06cc' : 'ī' # ـِي kasra followed by ي
206
+ '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
207
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
208
+ '\u064f\u0648' : 'ō' # ـُو damma followed by و
209
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
210
+ '\u064e\u0649' : 'ay' # ـَى fatha followed by ى which is ا not ي
211
+ '\u064e\u0648\u0652' : 'aw' # ـَوْ
212
+ '\u064e\u0648' : 'ow' # ـَو
213
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
214
+ '\u0650\u06cc\u0651\u064e' : 'īy' # ـِيَّ
215
+ '\u064e\u064a' : 'aī' # ـَي
216
+ '\u064e\u06cc' : 'aī' # ـَي
217
+ '\u0649\u0670': 'á' # ىٰ
218
+ '\u0674': '-e' # ٴ
219
+ '\u0654': '-e' # ٔ
220
+ # - '-ye'
221
+
222
+ '\u0622' : 'â' # آ
223
+
224
+ # ta' marboota
225
+ '\u0629' : 't' # ة in the middle of the sentence
226
+ '\u0629$' : 'h'
227
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'h'
228
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'h'
229
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'h'
230
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'h'
231
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'h'
232
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'h'
233
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'h'
234
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'h'
235
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'h'
236
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'h'
237
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'h'
238
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'h'
239
+
240
+ # shadda
241
+ '\u0628' : 'bb' # ب
242
+ '\u067E' : 'pp' # پ
243
+ '\u062a' : 'tt' # ت
244
+ '\u062B' : 's̱s̱' # ث
245
+ '\u062c' : 'jj' # ج
246
+ '\u0686' : 'č̱č̱' # ‫چ‬‬
247
+ '\u062d' : 'ḥḥ' # ح
248
+ '\u062e' : 'ḵḵ' # خ
249
+ '\u062f' : 'dd' # د
250
+ '\u0689' : 'ḏḏ' # ‫ډ‬
251
+ '\u0630' : 'ẕẕ' # ذ
252
+ '\u0631' : 'rr' # ر
253
+ '\u0632' : 'zz' # ز
254
+ '\u0698' : 'zz' # ‫ژ‬
255
+ '\u0633' : 'ss' # س
256
+ '\u0634' : 'šš' # ش
257
+ '\u0635' : 'ṣṣ' # ص
258
+ '\u0636' : 'żż' # ض
259
+ '\u0637' : 'ṭṭ' # ط
260
+ '\u0638' : 'zz' # ظ
261
+ '\u0639' : '‘' # ع
262
+ '\u063a' : 'gh' # غ
263
+ '\u0641' : 'ff' # ف
264
+ '\u0642' : 'qq' # ق
265
+ '\u06A9' : 'kk' # ک
266
+ '\u06AF' : 'gg' # ‫گ‬
267
+ '\u0644' : 'll' # ل
268
+ '\u0645' : 'mm' # م
269
+ '\u0646' : 'nn' # ن
270
+ '\u0648' : 'vv' # و
271
+ '\u0647' : 'hh' # ه
272
+ '\u064a' : 'yy' # ي
273
+ '\u0649' : 'yy' # ي
274
+ '\u06D0' : 'ēē' # ې
275
+ '\u06CD' : 'êy' # ‫ۍ
276
+
277
+ # Tanvin
278
+ '\u064b': '´´' # ً
279
+ '\u064c': '' # ٌ
280
+ '\u064d': '' # ٍ
281
+
282
+ # hamzeh
283
+ '\u0621' : '’' # ء
284
+ '\u0623' : 'â’' # أ
285
+ '\u0624' : 'v’' # ؤ
286
+ '\u0626' : 'y’' # ئ
287
+
288
+ # punctuation
289
+
290
+ '\u060c' : ',' # vavak comma
291
+ '\u061b' : ';' # nogteh vavak semi column
292
+ '\u061f' : '?' # neshane-ye porsesh question mark
293
+
294
+ '\u0625' : '' # إ
295
+ '\u0627' : 'â' # ا
296
+
297
+ # See note B
298
+ '\b\u0627\u0644' : 'al ' # ال
299
+ # '\uFE8E' : '' # ﺎ
300
+
301
+ # Sun letters
302
+ '\b\u0627\u0644\u062a' : 'at t' # الت
303
+ '\b\u0627\u0644\u062b' : 'as̄ s̄' # الث
304
+ '\b\u0627\u0644\u062f' : 'ad d' # الد
305
+ '\b\u0627\u0644\u0630' : 'az̄ z̄' # الذ
306
+ '\b\u0627\u0644\u0631' : 'ar r' # الر
307
+ '\b\u0627\u0644\u0632' : 'az z' # الز
308
+ '\b\u0627\u0644\u0633' : 'as s' # الس
309
+ '\b\u0627\u0644\u0634' : 'ash sh' # الش
310
+ '\b\u0627\u0644\u0635' : 'aş ş' # الص
311
+ '\b\u0627\u0644\u0636' : 'aẕ ẕ' # الض
312
+ '\b\u0627\u0644\u0637' : 'aţ ţ' # الط
313
+ '\b\u0627\u0644\u0638' : 'az̧ z̧' # الظ
314
+ '\b\u0627\u0644\u0644' : 'al l' # الل
315
+ '\b\u0627\u0644\u0646' : 'an n' # الن
316
+
317
+ # consonant characters
318
+
319
+ '\u0628' : 'b' # ب
320
+ '\u067E': 'p' # پ
321
+ '\u062a' : 't' # ت
322
+ # '\u067C': 'ṯ' # ټ
323
+ '\u062B': 's̱' # ث
324
+ '\u062c' : 'j' # ج
325
+ '\u0686': 'c' # ‫چ‬
326
+
327
+ # # The variant form ج is seen infrequently and does not have a
328
+ # # single Unicode encoding.
329
+ # '\u0681': 'dz' # Note 2 # ‫ځ‬
330
+
331
+ # '\u0685': 'ts' # Note 2 # ‫څ
332
+
333
+ '\u062d' : 'ḥ' # ح
334
+ '\u062e' : 'ḵ' # خ
335
+ '\u062f' : 'd' # د
336
+ '\u0689' : 'ḏ' # ‫ډ‬
337
+ '\u0630' : 'ẕ' # ذ
338
+ '\u0631' : 'r' # ر
339
+ # '\u0693' : 'ṟ' # ړ
340
+ '\u0632' : 'z' # ز
341
+ '\u0698' : 'z' # ‫ژ‬
342
+ # '\u0696' : 'z͟h' # ږ
343
+ '\u0633' : 's' # س
344
+ # '\u069A' : 's͟h' # ښ
345
+ '\u0634' : 'š' # ش
346
+ '\u0635' : 'ṣ' # ص
347
+ '\u0636' : 'ż' # ض
348
+ '\u0637' : 'ṭ' # ط
349
+ '\u0638' : 'z' # ظ
350
+ '\u0639' : '‘' # ع
351
+ '\u063a' : 'gh' # غ
352
+ '\u0641' : 'f' # ف
353
+ '\u0642' : 'q' # ق
354
+ # '\u0643' : 'k' # ك
355
+ '\u06A9' : 'k' # ک
356
+ '\u06AF' : 'g' # ‫گ‬
357
+ '\u0644' : 'l' # ل
358
+ '\u0645' : 'm' # م
359
+ '\u0646' : 'n' # ن
360
+ # '\u06BC' : 'ṉ' # ڼ
361
+ '\u0648' : 'v' # و
362
+ '\u0647' : 'h' # ه
363
+ '\u064a' : 'y' # ي
364
+ '\u0649' : 'y' # ي
365
+ '\u06D0' : 'ē' # ې
366
+ '\u06CD' : 'êy' # ‫ۍ‬
@@ -1,10 +1,10 @@
1
1
  ---
2
2
  authority_id: iso
3
- id: iso9
4
- language: rus
3
+ id: 9-1995
4
+ language: iso-639-2:rus
5
5
  source_script: Cyrl
6
6
  destination_script: Latn
7
- name: ISO 9
7
+ name: "ISO 9:1995 Information and documentation — Transliteration of Cyrillic characters into Latin characters — Slavic and non-Slavic languages"
8
8
  url: https://www.iso.org/standard/3589.html
9
9
  creation_date: 1995
10
10
  description: |
@@ -13,9 +13,8 @@ description: |
13
13
  languages. Table 3 includes in a single sequence, listed in the
14
14
  Cyrillic alphabetic order, the 118 single or diacritic-carrying
15
15
  characters that appear in one or another of the considered alphabets.
16
+
16
17
  tests:
17
- - source:
18
- expected:
19
18
 
20
19
  map:
21
20
  characters:
@@ -270,4 +269,3 @@ map:
270
269
  "\u04c0": "\u2021" # Ӏ => ‡
271
270
  "\u02bc": "\u0060" # ʼ => `
272
271
  "\u02ee": "\u00a8" # ˮ => ¨
273
-
@@ -0,0 +1,220 @@
1
+ ---
2
+ authority_id: iso
3
+ id: 15919-2001
4
+ language: iso-639-2:san
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: "Information and documentation — Transliteration of Devanagari and related Indic scripts into Latin characters"
8
+ url: https://www.chatranjali.fr/Scripts/Standards/ISO15919.pdf
9
+ creation_date: 2001
10
+ adoption_date: 2001
11
+ description: |
12
+ Script conversion is often required for documents such as historical and literary texts, geographical texts (including
13
+ maps and atlases), bibliographies, catalogues, lists and passports (and other identification documents).
14
+
15
+ Text in Devanagari script or other Indic scripts sometimes needs to be shown in Latin script, where users, or
16
+ equipment that they are using, cannot read or write the text
17
+
18
+ This International Standard applies to transliteration of Devanagari, and to Indic scripts related to Devanagari,
19
+ independent of the period in which it is or was used.
20
+
21
+ notes:
22
+
23
+ - All transliterations made using this International Standard shall be case-insensitive.
24
+ - Inherent a with a consonant shall always be transliterated.
25
+ - anusvara (including Vedic anusvara) shall be transliterated as ṁ
26
+ - candrabindu shall be transliterated as m̐
27
+ - When m̐, ṃ or ṁ are associated with a vowel, they shall be placed after the vowel. When m̐ is associated
28
+ with a semivowel, it shall be placed before the semivowel.
29
+ - Latin punctuation signs and Hindu-Arabic numerals shall remain unchanged in transliteration.
30
+ Indic punctuation is outside the scope of this International Standard.
31
+ - The Vedic accent Udatta shall be transliterated as an acute accent over the transliterated vowel, and the
32
+ independent Svarita as a grave accent over the transliterated vowel. In the case of the digraphs ai, au, the accent
33
+ shall be attached to the second vowel.
34
+ - |
35
+ A colon: before a Latin character shall be used to resolve ambiguity. Some normative cases are as
36
+ follows.
37
+ - :’ for avagraha in modern text. (The apostrophe in modern text remains unchanged in accordance with previous rule.
38
+ - Vowel hiatus, not digraph transliteration of diphthongs; as in Sanskrit pra:uga (not prauga), “yoke”;
39
+ - If a character in an Indic script is defined in such a way as to be equivalent to another character in any
40
+ script, where the second character has a transliteration in this International Standard, then the first character shall
41
+ be transliterated in the same way as the second character.
42
+ - Where it is desired to show the Vedic accent Anudatta, it should be transliterated as an underscore. In the case of
43
+ the digraphs ai, au, both Latin vowels should be underscored.
44
+ Where word boundaries are not shown in the original text (as happens commonly in Sanskrit) and a word ends in a
45
+ consonant, the transliteration should show word division by a space; but when phonological processes result in two
46
+ words sharing a common vowel, no attempt should be made to separate them. This will require a good knowledge
47
+ of the language in question.
48
+
49
+
50
+ tests:
51
+ - source: "पूर्णमदः पूर्णमिदं पूर्णात् पूर्ण्मुदच्यते"
52
+ expected: "paūraṇamadaḥ paūraṇamaidaṁ paūraṇaāta paūraṇamaudacayatae"
53
+ - source: "पूर्णस्य पूर्णमादाय पूर्णमेवावशिष्यते"
54
+ expected: "paūraṇasaya paūraṇamaādaāya paūraṇamaevaāvaśaiṣayatae"
55
+ - source: "यथा चतुर्भिः कनकं परीक्ष्यते निर्घषणच्छेदन तापताडनैः"
56
+ expected: "yathaā cataurabhaiḥ kanakaṁ paraīkaṣayatae nairaghaṣaṇacachaedana taāpataāḍanaaiḥ"
57
+ - source: "तथा चतुर्भिः पुरुषः परीक्ष्यते त्यागेन शीलेन गुणेन कर्मणा"
58
+ expected: "tathaā cataurabhaiḥ paurauṣaḥ paraīkaṣayatae tayaāgaena śaīlaena gauṇaena karamaṇaā"
59
+ - source: "यो न हृष्यति न द्वेष्टि न शोचति न काङ्‍क्षति"
60
+ expected: "yao na haṛṣayatai na davaeṣaṭai na śaocatai na kaāṅakaṣatai"
61
+ - source: "शुभाशुभपरित्यागी भक्तिमान्यः स मे प्रियः"
62
+ expected: "śaubhaāśaubhaparaitayaāgaī bhakataimaānayaḥ sa mae paraiyaḥ"
63
+ - source: "सत्य -सत्यमेवेश्वरो लोके सत्ये धर्मः सदाश्रितः"
64
+ expected: "sataya -satayamaevaeśavarao laokae satayae dharamaḥ sadaāśaraitaḥ"
65
+ - source: "सत्यमूलनि सर्वाणि सत्यान्नास्ति परं पदम्"
66
+ expected: "satayamaūlanai saravaāṇai satayaānanaāsatai paraṁ padama"
67
+ - source: "पिता माताग्निरात्मा च गुरुश्च भरतर्षभ"
68
+ expected: "paitaā maātaāganairaātamaā ca gaurauśaca bharataraṣabha"
69
+ - source: "पल्यालँ"
70
+ expected: "palayaām̐la"
71
+ - source: "दुसूलँ"
72
+ expected: "dausaūm̐la"
73
+
74
+ map:
75
+
76
+ characters:
77
+
78
+ # I. Vowels and Diphthongs (see Note 1)
79
+
80
+ 'अ': 'a'
81
+ 'आ': 'ā'
82
+ 'इ': 'i'
83
+ 'ई': 'ī'
84
+ 'उ': 'u'
85
+ 'ऊ': 'ū'
86
+ 'ऋ': 'ṛ'
87
+ 'ॠ': 'ṝ'
88
+ 'ऌ': 'ḷ'
89
+ 'ॡ': 'ḹ'
90
+ 'ए': 'e'
91
+ 'ऐ': 'ai'
92
+ 'ओ': 'o'
93
+ 'औ': 'au'
94
+ 'ऍ': 'ê'
95
+ 'ऑ': 'ô'
96
+
97
+ # II. Consonants (see Note 2)
98
+ # Gutturals
99
+ 'क': 'ka'
100
+ 'ख': 'kha'
101
+ 'ग': 'ga'
102
+ 'घ': 'gha'
103
+ 'ङ': 'ṅa'
104
+
105
+ # Palatals
106
+ 'च': 'ca'
107
+ 'छ': 'cha'
108
+ 'ज': 'ja'
109
+ 'झ': 'jha'
110
+ 'ञ': 'ña'
111
+
112
+ # Cerebrals
113
+ 'ट': 'ṭa'
114
+ 'ठ': 'ṭha'
115
+ 'ड': 'ḍa'
116
+ 'ढ': 'ḍha'
117
+ 'ण': 'ṇa'
118
+
119
+ # Dentals
120
+ 'त': 'ta'
121
+ 'थ': 'tha'
122
+ 'द': 'da'
123
+ 'ध': 'dha'
124
+ 'न': 'na'
125
+
126
+ # Labials
127
+ 'प': 'pa'
128
+ 'फ': 'pha'
129
+ 'ब': 'ba'
130
+ 'भ': 'bha'
131
+ 'म': 'ma'
132
+
133
+ # Semivowels
134
+ 'य': 'ya'
135
+ 'र': 'ra'
136
+ 'ल': 'la'
137
+ 'ळ': 'ḷa'
138
+ 'व': 'va'
139
+
140
+ # Sibilants
141
+ 'श': 'śa'
142
+ 'ष': 'ṣa'
143
+ 'स': 'sa'
144
+
145
+ # Aspirate
146
+ 'ह': 'ha'
147
+
148
+ 'ᳵ': 'ẖ'
149
+ 'ᳶ': 'ḫ'
150
+
151
+
152
+ # Anusvāra
153
+ 'ं': 'ṁ'
154
+
155
+ # Bisarga
156
+ 'ः': 'ḥ'
157
+
158
+ # candrabindu
159
+ 'ँ': 'm̐'
160
+
161
+ # Abagraha
162
+ 'ऽ': ':’' # (apostrophe)
163
+
164
+
165
+ # Medials # Needed for connecting constants
166
+ 'ा': "ā"
167
+ 'ि': "i"
168
+ 'ी': "ī"
169
+ 'ु': "u"
170
+ 'ू': "ū"
171
+ 'ॢ': "ḷ"
172
+ 'ॣ': "ḹ"
173
+ 'ृ': "ṛ"
174
+ 'ॄ': "ṝ"
175
+ 'े': "e"
176
+ 'ै': "ai"
177
+ 'ो': "o"
178
+ 'ौ': "au"
179
+ 'ॉ': 'ô'
180
+ 'ॅ': "ê"
181
+
182
+ '्': ""
183
+ '‍': ''# Used for joining
184
+
185
+
186
+ # for semivowel rule no. 5
187
+
188
+ 'यँ': 'm̐ya'
189
+ 'रँ': 'm̐ra'
190
+ 'लँ': 'm̐la'
191
+ 'ळँ': 'm̐ḷa'
192
+ 'वँ': 'm̐va'
193
+
194
+
195
+ # digits
196
+
197
+ '०': '0'
198
+ '१': '1'
199
+ '२': '2'
200
+ '३': '3'
201
+ '४': '4'
202
+ '५': '5'
203
+ '६': '6'
204
+ '७': '7'
205
+ '८': '8'
206
+ '९': '9'
207
+
208
+ # Perso-Arabic characters
209
+
210
+ 'स़': 's̱a'
211
+ 'ह़': 'ẓa'
212
+ 'ख़': 'k͟ha'
213
+ 'ज़': 'za'
214
+ 'त़': 't̤a'
215
+ 'ग़': 'ġa'
216
+ 'फ़': 'fa'
217
+ 'क़': 'qa'
218
+ 'व़': 'wa'
219
+ 'ड़': 'ṛa'
220
+ 'ढ़': 'ṛha'