interscript-maps 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +7 -0
  2. data/README.adoc +28 -0
  3. data/interscript-maps.gemspec +28 -0
  4. data/interscript-maps.yaml +235 -0
  5. data/libs/posix.iml +11 -0
  6. data/libs/unicode.iml +13 -0
  7. data/libs/var-Cyrl.iml +7 -0
  8. data/libs/var-kor.iml +17 -0
  9. data/maps-staging/royin-tha-Thai-Latn-1939-generic.imp +98 -0
  10. data/maps-staging/royin-tha-Thai-Latn-1968.imp +156 -0
  11. data/maps-staging/royin-tha-Thai-Latn-1999-chained.imp +161 -0
  12. data/maps-staging/royin-tha-Thai-Latn-1999.imp +78 -0
  13. data/maps-staging/var-tha-Thai-Thai-phonemic.imp +53 -0
  14. data/maps-staging/var-tha-Thai-Zsym-ipa.imp +273 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.imp +27515 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.imp +392 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.imp +85 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.imp +1171 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.imp +214 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.imp +53 -0
  21. data/maps/alalc-aze-Arab-Latn-1997.imp +321 -0
  22. data/maps/alalc-aze-Cyrl-Latn-1997.imp +101 -0
  23. data/maps/alalc-bel-Cyrl-Latn-1997.imp +118 -0
  24. data/maps/alalc-ben-Beng-Latn-1997.imp +225 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.imp +135 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.imp +110 -0
  27. data/maps/alalc-div-Thaa-Latn-1997.imp +171 -0
  28. data/maps/alalc-ell-Grek-Latn-1997.imp +381 -0
  29. data/maps/alalc-ell-Grek-Latn-2010.imp +382 -0
  30. data/maps/alalc-guj-Gujr-Latn-1997.imp +223 -0
  31. data/maps/alalc-guj-Gujr-Latn-2011.imp +57 -0
  32. data/maps/alalc-hin-Deva-Latn-1997.imp +248 -0
  33. data/maps/alalc-hin-Deva-Latn-2011.imp +63 -0
  34. data/maps/alalc-kan-Kana-Latn-1997.imp +233 -0
  35. data/maps/alalc-kan-Kana-Latn-2011.imp +58 -0
  36. data/maps/alalc-kat-Geok-Latn-1997.imp +109 -0
  37. data/maps/alalc-kat-Geor-Latn-1997.imp +104 -0
  38. data/maps/alalc-kor-Hang-Latn-1997.imp +68 -0
  39. data/maps/alalc-mal-Mlym-Latn-1997.imp +260 -0
  40. data/maps/alalc-mal-Mlym-Latn-2012.imp +65 -0
  41. data/maps/alalc-mar-Deva-Latn-1997.imp +178 -0
  42. data/maps/alalc-mar-Deva-Latn-2011.imp +51 -0
  43. data/maps/alalc-mkd-Cyrl-Latn-1997.imp +125 -0
  44. data/maps/alalc-mkd-Cyrl-Latn-2013.imp +113 -0
  45. data/maps/alalc-mon-Cyrl-Latn-1997.imp +161 -0
  46. data/maps/alalc-ori-Orya-Latn-1997.imp +234 -0
  47. data/maps/alalc-ori-Orya-Latn-2011.imp +59 -0
  48. data/maps/alalc-pan-Guru-Latn-1997.imp +241 -0
  49. data/maps/alalc-pan-Guru-Latn-2011.imp +71 -0
  50. data/maps/alalc-per-Arab-Latn-1997.imp +318 -0
  51. data/maps/alalc-pli-Deva-Latn-2012.imp +140 -0
  52. data/maps/alalc-pra-Deva-Latn-2012.imp +52 -0
  53. data/maps/alalc-rus-Cyrl-Latn-1997.imp +165 -0
  54. data/maps/alalc-rus-Cyrl-Latn-2012.imp +107 -0
  55. data/maps/alalc-san-Deva-Latn-2012.imp +207 -0
  56. data/maps/alalc-sin-Sinh-Latn-1997.imp +246 -0
  57. data/maps/alalc-sin-Sinh-Latn-2011.imp +63 -0
  58. data/maps/alalc-srp-Cyrl-Latn-1997.imp +124 -0
  59. data/maps/alalc-srp-Cyrl-Latn-2013.imp +115 -0
  60. data/maps/alalc-tam-Taml-Latn-1997.imp +52 -0
  61. data/maps/alalc-tam-Taml-Latn-2011.imp +49 -0
  62. data/maps/alalc-tel-Telu-Latn-1997.imp +237 -0
  63. data/maps/alalc-tel-Telu-Latn-2011.imp +58 -0
  64. data/maps/alalc-ukr-Cyrl-Latn-1997.imp +123 -0
  65. data/maps/alalc-ukr-Cyrl-Latn-2011.imp +32 -0
  66. data/maps/apcbg-bul-Cyrl-Latn-1995.imp +194 -0
  67. data/maps/az-aze-Cyrl-Latn-1939.imp +105 -0
  68. data/maps/az-aze-Cyrl-Latn-1958.imp +50 -0
  69. data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +160 -0
  70. data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +165 -0
  71. data/maps/bgn-jpn-Hrkt-Latn-1962.imp +288 -0
  72. data/maps/bgn-kor-Hang-Latn-1943.imp +31 -0
  73. data/maps/bgn-kor-Kore-Latn-1943.imp +33 -0
  74. data/maps/bgna-bul-Cyrl-Latn-2006.imp +119 -0
  75. data/maps/bgna-bul-Cyrl-Latn-2009.imp +119 -0
  76. data/maps/bgnpcgn-amh-Ethi-Latn-1967.imp +393 -0
  77. data/maps/bgnpcgn-ara-Arab-Latn-1956.imp +472 -0
  78. data/maps/bgnpcgn-arm-Armn-Latn-1981.imp +125 -0
  79. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.imp +111 -0
  80. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +169 -0
  81. data/maps/bgnpcgn-bal-Arab-Latn-2008.imp +296 -0
  82. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +200 -0
  83. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.imp +137 -0
  84. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.imp +38 -0
  85. data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +176 -0
  86. data/maps/bgnpcgn-deu-Latn-Latn-2000.imp +56 -0
  87. data/maps/bgnpcgn-div-Thaa-Latn-1972.imp +90 -0
  88. data/maps/bgnpcgn-div-Thaa-Latn-1988.imp +71 -0
  89. data/maps/bgnpcgn-ell-Grek-Latn-1962.imp +443 -0
  90. data/maps/bgnpcgn-ell-Grek-Latn-1996.imp +269 -0
  91. data/maps/bgnpcgn-fao-Latn-Latn-1964.imp +41 -0
  92. data/maps/bgnpcgn-fao-Latn-Latn-1968.imp +28 -0
  93. data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +111 -0
  94. data/maps/bgnpcgn-isl-Latn-Latn-1964.imp +42 -0
  95. data/maps/bgnpcgn-isl-Latn-Latn-1968.imp +32 -0
  96. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.imp +191 -0
  97. data/maps/bgnpcgn-kat-Geor-Latn-1981.imp +116 -0
  98. data/maps/bgnpcgn-kat-Geor-Latn-2009.imp +43 -0
  99. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.imp +193 -0
  100. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.imp +170 -0
  101. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +177 -0
  102. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +40 -0
  103. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +41 -0
  104. data/maps/bgnpcgn-kur-Arab-Latn-2007.imp +240 -0
  105. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.imp +132 -0
  106. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.imp +174 -0
  107. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.imp +168 -0
  108. data/maps/bgnpcgn-nep-Deva-Latn-2011.imp +208 -0
  109. data/maps/bgnpcgn-per-Arab-Latn-1958.imp +312 -0
  110. data/maps/bgnpcgn-prs-Arab-Latn-2007.imp +552 -0
  111. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.imp +445 -0
  112. data/maps/bgnpcgn-pus-Arab-Latn-1968.imp +289 -0
  113. data/maps/bgnpcgn-ron-cyrl-latn-2002.imp +165 -0
  114. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.imp +133 -0
  115. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.imp +195 -0
  116. data/maps/bgnpcgn-sme-Latn-Latn-1984.imp +48 -0
  117. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.imp +55 -0
  118. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.imp +146 -0
  119. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.imp +185 -0
  120. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.imp +188 -0
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.imp +136 -0
  122. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.imp +88 -0
  123. data/maps/bgnpcgn-urd-Arab-Latn-2007.imp +333 -0
  124. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.imp +145 -0
  125. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.imp +74 -0
  126. data/maps/bgnpcgn-zho-Hans-Latn-1979.imp +7463 -0
  127. data/maps/bis-asm-Beng-Latn-13194-1991.imp +154 -0
  128. data/maps/bis-ben-Beng-Latn-13194-1991.imp +151 -0
  129. data/maps/bis-dev-Deva-Latn-13194-1991.imp +178 -0
  130. data/maps/bis-guj-Gujr-Latn-13194-1991.imp +172 -0
  131. data/maps/bis-kan-Kana-Latn-13194-1991.imp +166 -0
  132. data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +170 -0
  133. data/maps/bis-ori-Orya-Latn-13194-1991.imp +168 -0
  134. data/maps/bis-pnj-Guru-Latn-13194-1991.imp +169 -0
  135. data/maps/bis-tel-Telu-Latn-13194-1991.imp +165 -0
  136. data/maps/bis-tml-Taml-Latn-13194-1991.imp +149 -0
  137. data/maps/by-bel-Cyrl-Latn-1998.imp +123 -0
  138. data/maps/by-bel-Cyrl-Latn-2007.imp +77 -0
  139. data/maps/din-grc-Grek-Latn-31634-2011-t1.imp +627 -0
  140. data/maps/din-hin-Deva-Latn-33904-2018.imp +101 -0
  141. data/maps/din-kat-Geor-Latn-32707-2010.imp +103 -0
  142. data/maps/din-mar-Deva-Latn-33904-2018.imp +83 -0
  143. data/maps/din-nep-Deva-Latn-33904-2018.imp +110 -0
  144. data/maps/din-pli-Deva-Latn-33904-2018.imp +72 -0
  145. data/maps/din-pra-Deva-Latn-33904-2018.imp +66 -0
  146. data/maps/din-san-Deva-Latn-33904-2018.imp +294 -0
  147. data/maps/din-tam-Taml-Latn-33903-2016.imp +187 -0
  148. data/maps/dos-nep-Deva-Latn-1997.imp +47 -0
  149. data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +399 -0
  150. data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +397 -0
  151. data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +34 -0
  152. data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +178 -0
  153. data/maps/ggg-kat-Geor-Latn-2002.imp +75 -0
  154. data/maps/gki-bel-Cyrl-Latn-1992.imp +44 -0
  155. data/maps/gki-bel-Cyrl-Latn-2000.imp +159 -0
  156. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.imp +179 -0
  157. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.imp +132 -0
  158. data/maps/hk-yue-Hani-Latn-1888.imp +29201 -0
  159. data/maps/icao-bel-Cyrl-Latn-9303.imp +136 -0
  160. data/maps/icao-bul-Cyrl-Latn-9303.imp +127 -0
  161. data/maps/icao-fas-Arab-Latn-9303.imp +112 -0
  162. data/maps/icao-heb-Hebr-Latn-9303.imp +160 -0
  163. data/maps/icao-mkd-Cyrl-Latn-9303.imp +126 -0
  164. data/maps/icao-rus-Cyrl-Latn-9303.imp +126 -0
  165. data/maps/icao-srp-Cyrl-Latn-9303.imp +126 -0
  166. data/maps/icao-ukr-Cyrl-Latn-9303.imp +127 -0
  167. data/maps/iso-ara-Arab-Latn-233-1984.imp +301 -0
  168. data/maps/iso-asm-Beng-Latn-15919-2001.imp +73 -0
  169. data/maps/iso-ben-Beng-Latn-15919-2001.imp +171 -0
  170. data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +365 -0
  171. data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +43 -0
  172. data/maps/iso-guj-Gujr-Latn-15919-2001.imp +214 -0
  173. data/maps/iso-hin-Deva-Latn-15919-2001.imp +73 -0
  174. data/maps/iso-inc-Deva-Latn-15919-2001.imp +61 -0
  175. data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +59 -0
  176. data/maps/iso-kan-Kana-Latn-15919-2001.imp +212 -0
  177. data/maps/iso-kat-Geor-Latn-9984-1996.imp +103 -0
  178. data/maps/iso-kor-Hang-Latn-1996-method1.imp +140 -0
  179. data/maps/iso-kor-Hang-Latn-1996-method2.imp +132 -0
  180. data/maps/iso-mal-Mlym-Latn-15919-2001.imp +276 -0
  181. data/maps/iso-mar-Deva-Latn-15919-2001.imp +68 -0
  182. data/maps/iso-nep-Deva-Latn-15919-2001.imp +75 -0
  183. data/maps/iso-ori-Orya-Latn-15919-2001.imp +188 -0
  184. data/maps/iso-pan-Guru-Latn-15919-2001.imp +217 -0
  185. data/maps/iso-pli-Beng-Latn-15919-2001.imp +66 -0
  186. data/maps/iso-pli-Deva-Latn-15919-2001.imp +68 -0
  187. data/maps/iso-pli-Sinh-Latn-15919-2001.imp +211 -0
  188. data/maps/iso-pli-Thai-Latn-15919-2001.imp +47 -0
  189. data/maps/iso-pra-Deva-Latn-15919-2001.imp +60 -0
  190. data/maps/iso-prs-Arab-Latn-233-3-1999.imp +352 -0
  191. data/maps/iso-rus-Cyrl-Latn-9-1995.imp +279 -0
  192. data/maps/iso-san-Deva-Latn-15919-2001.imp +215 -0
  193. data/maps/iso-tam-Taml-Latn-15919-2001.imp +153 -0
  194. data/maps/iso-tel-Telu-Latn-15919-2001.imp +214 -0
  195. data/maps/iso-tha-Thai-Latn-11940-1998.imp +114 -0
  196. data/maps/kp-kor-Hang-Latn-2002.imp +540 -0
  197. data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +29005 -0
  198. data/maps/masm-mon-Cyrl-Latn-5217-2012.imp +136 -0
  199. data/maps/masm-mon-Latn-Cyrl-5217-2012.imp +162 -0
  200. data/maps/mext-jpn-Hrkt-Latn-1954.imp +403 -0
  201. data/maps/moct-kor-Hang-Latn-2000.imp +475 -0
  202. data/maps/mofa-jpn-Hrkt-Latn-1989.imp +484 -0
  203. data/maps/mv-div-Thaa-Latn-1987.imp +144 -0
  204. data/maps/mvd-bel-Cyrl-Latn-2008.imp +224 -0
  205. data/maps/mvd-bel-Cyrl-Latn-2010.imp +64 -0
  206. data/maps/mvd-rus-Cyrl-Latn-2008.imp +110 -0
  207. data/maps/mvd-rus-Cyrl-Latn-2010.imp +40 -0
  208. data/maps/odni-ara-Arab-Latn-2004.imp +106 -0
  209. data/maps/odni-ara-Arab-Latn-2015.imp +281 -0
  210. data/maps/odni-aze-Cyrl-Latn-2015.imp +158 -0
  211. data/maps/odni-bel-Cyrl-Latn-2015.imp +138 -0
  212. data/maps/odni-bul-Cyrl-Latn-2005.imp +90 -0
  213. data/maps/odni-bul-Cyrl-Latn-2015.imp +103 -0
  214. data/maps/odni-che-Cyrl-Latn-2015.imp +165 -0
  215. data/maps/odni-fas-Arab-Latn-2004.imp +268 -0
  216. data/maps/odni-fas-Arab-Latn-2015.imp +398 -0
  217. data/maps/odni-hin-Deva-Latn-2004.imp +180 -0
  218. data/maps/odni-hin-Deva-Latn-2015.imp +256 -0
  219. data/maps/odni-kat-Geor-Latn-2015.imp +76 -0
  220. data/maps/odni-kaz-Cyrl-Latn-2015.imp +164 -0
  221. data/maps/odni-kir-Cyrl-Latn-2015.imp +149 -0
  222. data/maps/odni-kor-Hang-Latn-2015.imp +307 -0
  223. data/maps/odni-mkd-Cyrl-Latn-2005.imp +28 -0
  224. data/maps/odni-mkd-Cyrl-Latn-2015.imp +124 -0
  225. data/maps/odni-prs-Arab-Latn-2004.imp +120 -0
  226. data/maps/odni-prs-Arab-Latn-2015.imp +225 -0
  227. data/maps/odni-pus-Arab-Latn-2011.imp +327 -0
  228. data/maps/odni-rus-Cyrl-Latn-2015.imp +79 -0
  229. data/maps/odni-srp-Cyrl-Latn-2005.imp +35 -0
  230. data/maps/odni-srp-Cyrl-Latn-2015.imp +130 -0
  231. data/maps/odni-tat-Cyrl-Latn-2015.imp +157 -0
  232. data/maps/odni-tgk-Cyrl-Latn-2015.imp +161 -0
  233. data/maps/odni-tuk-Cyrl-Latn-2015.imp +159 -0
  234. data/maps/odni-uig-Cyrl-Latn-2015.imp +151 -0
  235. data/maps/odni-ukr-Cyrl-Latn-2015.imp +136 -0
  236. data/maps/odni-urd-Arab-Latn-2015.imp +220 -0
  237. data/maps/odni-uzb-Cyrl-Latn-2015.imp +165 -0
  238. data/maps/sac-zho-Hans-Latn-1979.imp +20940 -0
  239. data/maps/sasm-mon-Mong-Latn-general-1978.imp +294 -0
  240. data/maps/sasm-mon-Mong-Latn-phonetic-1978.imp +261 -0
  241. data/maps/ses-ara-Arab-Latn-1930.imp +225 -0
  242. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.imp +171 -0
  243. data/maps/ua-ukr-Cyrl-Latn-1996.imp +149 -0
  244. data/maps/ua-ukr-Cyrl-Latn-2007.imp +69 -0
  245. data/maps/ua-ukr-Cyrl-Latn-2010.imp +128 -0
  246. data/maps/un-amh-Ethi-Latn-2016.imp +483 -0
  247. data/maps/un-ara-Arab-Latn-1971.imp +137 -0
  248. data/maps/un-ara-Arab-Latn-1972.imp +155 -0
  249. data/maps/un-ara-Arab-Latn-2017.imp +375 -0
  250. data/maps/un-asm-Beng-Latn-1972.imp +188 -0
  251. data/maps/un-bel-Cyrl-Latn-2007.imp +78 -0
  252. data/maps/un-ben-Beng-Latn-2016.imp +516 -0
  253. data/maps/un-ell-Grek-Latn-1987-phonetic.imp +437 -0
  254. data/maps/un-ell-Grek-Latn-1987-tl.imp +27 -0
  255. data/maps/un-ell-Grek-Latn-1987-ts.imp +269 -0
  256. data/maps/un-guj-Gujr-Latn-1972.imp +196 -0
  257. data/maps/un-hin-Deva-Latn-2016.imp +356 -0
  258. data/maps/un-kan-Kana-Latn-2016.imp +214 -0
  259. data/maps/un-mal-Mlym-Latn-1972.imp +215 -0
  260. data/maps/un-mar-Deva-Latn-2016.imp +96 -0
  261. data/maps/un-mon-Mong-Latn-general-2013.imp +170 -0
  262. data/maps/un-mon-Mong-Latn-phonetic-2013.imp +170 -0
  263. data/maps/un-nep-Deva-Latn-1972.imp +295 -0
  264. data/maps/un-nep-Deva-Latn-2013.imp +62 -0
  265. data/maps/un-ori-Orya-Latn-1972.imp +208 -0
  266. data/maps/un-pan-Guru-Latn-1972.imp +321 -0
  267. data/maps/un-prs-Arab-Latn-1967.imp +214 -0
  268. data/maps/un-rus-Cyrl-Latn-1987.imp +96 -0
  269. data/maps/un-sin-Sinh-Latn-1972.imp +193 -0
  270. data/maps/un-tam-Taml-Latn-1972.imp +173 -0
  271. data/maps/un-tel-Telu-Latn-1972.imp +229 -0
  272. data/maps/un-ukr-Cyrl-Latn-1998.imp +58 -0
  273. data/maps/un-ukr-Cyrl-Latn-2012.imp +95 -0
  274. data/maps/un-urd-Arab-Latn-1972.imp +290 -0
  275. data/maps/var-amh-Ethi-Latn-eae-2003.imp +414 -0
  276. data/maps/var-gez-Ethi-Latn-eae-2003.imp +54 -0
  277. data/maps/var-hin-Deva-Latn-hunterian-1872.imp +212 -0
  278. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +399 -0
  279. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +382 -0
  280. data/maps/var-kor-Hang-Hang-jamo.imp +11196 -0
  281. data/maps/var-kor-Hang-Latn-mr-1939.imp +574 -0
  282. data/maps/var-kor-Kore-Hang-2013.imp +59764 -0
  283. data/maps/var-kor-Kore-Latn-mr-1939.imp +36 -0
  284. data/maps/var-mar-Deva-Latn-hunterian-1872.imp +39 -0
  285. data/maps/var-mon-Mong-Latn-1930.imp +101 -0
  286. data/maps/var-mon-Mong-Latn-lessing.imp +181 -0
  287. data/maps/var-mon-Mong-Latn-vpmc.imp +182 -0
  288. data/maps/var-pra-Deva-Latn-iast-1912.imp +36 -0
  289. data/maps/var-san-Deva-Latn-iast-1912.imp +147 -0
  290. data/maps/var-zho-Hani-Latn-wd-1979.imp +27549 -0
  291. metadata +335 -0
@@ -0,0 +1,125 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 1981
4
+ language: iso-639-2:arm
5
+ source_script: Armn
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1981 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/810208/ROMANIZATION_OF_ARMENIAN.pdf
9
+ creation_date: 2013
10
+ confirmation_date: 2019-06
11
+ description: |
12
+ The BGN/PCGN system for Armenian was designed for use in romanizing
13
+ names written in the Armenian alphabet. The Roman letters and letter
14
+ combinations shown as equivalents to the Armenian characters reflect
15
+ the eastern variety of Armenian, i.e. the language spoken in the
16
+ Republic of Armenia.
17
+
18
+ notes:
19
+ - The character ե should be romanized ye initially and after the vowel characters ա, ե, է, ը, ի, ո, ու and օ. In all other instances, it should be romanized e.
20
+ - The character ո should be romanized vo initially except in the word ով, which should be roman- ized ov. In all other instances, it should be romanized o.
21
+ - In Soviet-era sources this upper-case digraph character is found as Եի (Unicode encoding 0535+056B).
22
+ - This lower-case character may be seen either in digraph form as եւ (Unicode encoding 0565+0582) or in single character form as եւ (Unicode encoding 0587).
23
+ - The characters ԵՎ , եւ and եւ should be romanized yev initially, in isolation, and after the vowel characters ա, ե, է, ը, ի, ո, ու, and օ. In all other instances these characters should be romanized ev.
24
+ - All apostrophes appearing in Armenian romanization are encoded Unicode 2019.
25
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
26
+ }
27
+
28
+ tests {
29
+ }
30
+
31
+ # This map has been partially converted by the bin/maps_v1_to_v2 script
32
+ # The section below requires human attention. Remember to remove this
33
+ # comment and move the converted map to 'maps/' directory. Please also
34
+ # take note that the maps-staging directory will be cleaned up whenever
35
+ # you run the bin/maps_v1_to_v2 script. You should particularly be
36
+ # concerned about any regular expressions found in this file and about
37
+ # advanced expressions in parallel {} parts, and also about the order
38
+ # of particular parts of the stage.
39
+
40
+ stage {
41
+
42
+ # CHARACTERS
43
+ parallel {
44
+ sub "\u0531", "A"
45
+ sub "\u0532", "B"
46
+ sub "\u0533", "G"
47
+ sub "\u0534", "D"
48
+ sub "\u0535", "Ye" #treated same as Russian 'ye'
49
+ sub "\u0536", "Z"
50
+ sub "\u0537", "E"
51
+ sub "\u0538", "Y"
52
+ sub "\u0539", "T\u2019"
53
+ sub "\u053a", "Zh"
54
+ sub "\u053b", "I"
55
+ sub "\u053c", "L"
56
+ sub "\u053d", "Kh"
57
+ sub "\u053e", "Ts"
58
+ sub "\u053f", "K"
59
+ sub "\u0540", "H"
60
+ sub "\u0541", "Dz"
61
+ sub "\u0542", "Gh"
62
+ sub "\u0543", "Ch"
63
+ sub "\u0544", "M"
64
+ sub "\u0545", "Y"
65
+ sub "\u0546", "N"
66
+ sub "\u0547", "Sh"
67
+ sub "\u0548", "O" # VO initially and U when in combination with \u0552
68
+ sub "\u0549", "u'Ch\u2019'"
69
+ sub "\u054a", "P"
70
+ sub "\u054b", "J"
71
+ sub "\u054c", "Rr"
72
+ sub "\u054d", "S"
73
+ sub "\u054e", "V"
74
+ sub "\u054f", "T"
75
+ sub "\u0550", "R"
76
+ sub "\u0551", "Ts\u2019"
77
+ sub "\u0548\u0552", "U"
78
+ sub "\u0548\u0582", "U"
79
+ sub "\u0553", "P\u2019"
80
+ sub "\u0554", "K\u2019"
81
+ sub "\u0555", "O"
82
+ sub "\u0556", "F"
83
+ sub "\u0561", "a"
84
+ sub "\u0562", "b"
85
+ sub "\u0563", "g"
86
+ sub "\u0564", "d"
87
+ sub "\u0565", "e" # ye initially
88
+ sub "\u0566", "z"
89
+ sub "\u0567", "e"
90
+ sub "\u0568", "y"
91
+ sub "\u0569", "u't\u2019'"
92
+ sub "\u056a", "zh"
93
+ sub "\u056b", "i"
94
+ sub "\u056c", "l"
95
+ sub "\u056d", "kh"
96
+ sub "\u056e", "ts"
97
+ sub "\u056f", "k"
98
+ sub "\u0570", "h"
99
+ sub "\u0571", "dz"
100
+ sub "\u0572", "gh"
101
+ sub "\u0573", "ch"
102
+ sub "\u0574", "m"
103
+ sub "\u0575", "y"
104
+ sub "\u0576", "n"
105
+ sub "\u0577", "sh"
106
+ sub "\u0578", "o" # vo initially and u when in combination with \u0582
107
+ sub "\u0579", "ch\u2019"
108
+ sub "\u057a", "p"
109
+ sub "\u057b", "j"
110
+ sub "\u057c", "rr"
111
+ sub "\u057d", "s"
112
+ sub "\u057e", "v"
113
+ sub "\u057f", "t"
114
+ sub "\u0580", "r"
115
+ sub "\u0581", "ts\u2019"
116
+ sub "\u0578\u0582", "u"
117
+ sub "\u0583", "p\u2019"
118
+ sub "\u0584", "k\u2019"
119
+ sub "\u0585", "o"
120
+ sub "\u0586", "f"
121
+ sub "\u0587", "ev" # yev initially
122
+ }
123
+
124
+ }
125
+
@@ -0,0 +1,111 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 1993
4
+ language: iso-639-2:aze
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: AZERBAIJANI TABLE OF CORRESPONDENCES CYRILLIC-ROMAN -- BGN/PCGN 1993 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816656/TABLE_OF_CORRESPONDENCES_FOR_AZERBAIJANI.pdf
9
+ creation_date: 1993
10
+ confirmation_date: 2019-06
11
+ description: |
12
+ Azerbaijani, also known as Azeri, is the official language of the Republic of Azerbaijan. In 1991, the Azerbaijani government adopted the Roman alphabet to replace the existing Cyrillic alphabet. The presentation below provides a table of correspondences between the former Cyrillic alphabet and the current Roman alphabet. When Azerbaijani Roman-alphabet spellings are not available, this table can be used to convert Azerbaijani Cyrillic spellings.
13
+
14
+ notes:
15
+
16
+ - The special letter Ə, ə known as schwa, should be reproduced in that form whenever encountered. The characters Ə (Unicode 04D8) and ə (Unicode 04D9) should be used for schwa when writing in the Cyrillic script, but characters Ə (Unicode 018F) and ə (Unicode 0259) should be used when writing in the Roman alphabet. In those instances when it cannot be reproduced, however, the letter Ä ä may be substituted for it (see below).
17
+
18
+ - The obsolete characters й, э, ю, and я should be romanized ẏ, ė, yu., and ya.
19
+
20
+ - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. It is not known whether there exists an uppercase ‘J’ specific to the Cyrillic character set.
21
+
22
+ - |
23
+ An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is:
24
+ Ğ (U+011E), ğ (U+011F)
25
+ Ə (U+018F), ə (U+0259)
26
+ İ (U+0130), ı (U+0131)
27
+ Ö (U+00D6), ö (U+00F6)
28
+ Ü (U+00DC), ü (U+00FC)
29
+ Ç (U+00C7), ç (U+00E7)
30
+ Ş (U+015E), ş (U+015F)
31
+
32
+ - The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
33
+ }
34
+
35
+ tests {
36
+ test "Азәрбајҹан әлифбасы", "Azərbaycan əlifbası"
37
+ test "Бүтүн инсанлар ләјагәт вә һүгугларына ҝөрә азад бәрабәр доғулурлар.\nОнларын шүурлары вә виҹданлары вар вә бир-бирләринә мүнасибәтдә гардашлыг руһунда давранмалыдырлар.", "Bütün insanlar ləyaqət və hüquqlarına görə azad bərabər doğulurlar.\nOnların şüurları və vicdanları var və bir-birlərinə münasibətdə qardaşlıq ruhunda davranmalıdırlar."
38
+ }
39
+
40
+ stage {
41
+
42
+ # CHARACTERS
43
+ parallel {
44
+ sub "А", "A" # А
45
+ sub "Б", "B" # Б
46
+ sub "В", "V" # В
47
+ sub "Г", "Q" # Г
48
+ sub "Ғ", "Ğ" # Ғ
49
+ sub "Д", "D" # Д
50
+ sub "Е", "E" # Е
51
+ sub "Ә", "Ə" # Ә
52
+ sub "Ж", "J" # Ж
53
+ sub "З", "Z" # З
54
+ sub "И", "İ" # И
55
+ sub "Ы", "I" # Ы
56
+ sub "Ј", "Y" # Ј
57
+ sub "К", "K" # К
58
+ sub "Ҝ", "G" # Ҝ
59
+ sub "Л", "L" # Л
60
+ sub "М", "M" # М
61
+ sub "Н", "N" # Н
62
+ sub "О", "O" # О
63
+ sub "Ө", "Ö" # Ө
64
+ sub "П", "P" # П
65
+ sub "Р", "R" # Р
66
+ sub "С", "S" # С
67
+ sub "Т", "T" # Т
68
+ sub "У", "U" # У
69
+ sub "Ү", "Ü" # Ү
70
+ sub "Ф", "F" # Ф
71
+ sub "Х", "X" # Х
72
+ sub "Һ", "H" # Һ
73
+ sub "Ч", "Ç" # Ч
74
+ sub "Ҹ", "C" # Ҹ
75
+ sub "Ш", "Ş" # Ш
76
+
77
+ sub "а", "a" # а
78
+ sub "б", "b" # б
79
+ sub "в", "v" # в
80
+ sub "г", "q" # г
81
+ sub "ғ", "ğ" # ғ
82
+ sub "д", "d" # д
83
+ sub "е", "e" # е
84
+ sub "ә", "ə" # ә
85
+ sub "ж", "j" # ж
86
+ sub "з", "z" # з
87
+ sub "и", "i" # и
88
+ sub "ы", "ı" # ы
89
+ sub "ј", "y" # ј
90
+ sub "к", "k" # к
91
+ sub "ҝ", "g" # ҝ
92
+ sub "л", "l" # л
93
+ sub "м", "m" # м
94
+ sub "н", "n" # н
95
+ sub "о", "o" # о
96
+ sub "ө", "ö" # ө
97
+ sub "п", "p" # п
98
+ sub "р", "r" # р
99
+ sub "с", "s" # с
100
+ sub "т", "t" # т
101
+ sub "у", "u" # у
102
+ sub "ү", "ü" # ү
103
+ sub "ф", "f" # ф
104
+ sub "х", "x" # х
105
+ sub "һ", "h" # һ
106
+ sub "ч", "ç" # ч
107
+ sub "ҹ", "c" # ҹ
108
+ sub "ш", "ş" # ш
109
+ }
110
+
111
+ }
@@ -0,0 +1,169 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2007
4
+ language: iso-639-2:rus
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BASHKIR TABLE OF CORRESPONDENCES CYRILLIC-ROMAN BGN/PCGN 2007 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
9
+ creation_date: 2007
10
+ confirmation_date: 2019
11
+ description: |
12
+ Bashkir is an official language within Respublika Bashkortostan, one of the
13
+ republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
14
+ which case it should be romanized by means of the Cyrillic-Roman table of
15
+ correspondences given below
16
+
17
+ notes:
18
+ - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
19
+ - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
20
+ - The letter w is used between or after vowels.
21
+ - The letter w is used after e, u, ö and ə.
22
+ - |
23
+ An inventory of letter-diacritic combinations, with their Unicode encoding,
24
+ in addition to the unmodified letters of the basic Roman script is:
25
+ Ğ (U+011E) ğ (U+011F)
26
+ Ź (U+0179) ź (U+017A)
27
+ Ë (U+00CB) ë (U+00EB)
28
+ Ñ (U+00D1) ñ (U+00F1)
29
+ Ö (U+00D6) ö (U+00F6)
30
+ Ś (U+015A) ś (U+015B)
31
+ Ü (U+00DC) ü (U+00FC)
32
+ Ç (U+00C7) ç (U+00E7)
33
+ Ş (U+015E) ş (U+015F)
34
+ Ə (U+018F) ə (U+0259)
35
+ - |
36
+ The Roman-script columns show only lowercase forms but, when applying the table,
37
+ uppercase and lowercase Roman letters as appropriate should be used.
38
+ }
39
+
40
+ tests {
41
+ # adopted http://www.eki.ee/knab/lat/kblba.pdf
42
+ test "Васйылға", "Wasyılğa"
43
+ test "Еҙем", "Yeźem"
44
+ test "Раевка", "Raevka"
45
+ test "Сәйетҡол", "Səyetqol"
46
+ test "Ауырғазы", "Awırğazı"
47
+ test "Бурһыҡтау", "Burhıqtaw"
48
+ test "Мәләүез", "Mələwez"
49
+ test "Ҡыҙылъяр", "Qıźılyar"
50
+ # adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
51
+ test "кемдең", "kemdeñ"
52
+ test "кем", "kem"
53
+ test "был", "bıl"
54
+ test "ошо", "oşo"
55
+ test "быларҙың", "bılarźıñ"
56
+ test "һеҙҙән", "heźźən"
57
+ test "һин", "hin"
58
+ test "һеҙҙең", "heźźeñ"
59
+ }
60
+
61
+ stage {
62
+
63
+ # RULES
64
+ # note[1]
65
+ sub boundary + "\u0412", "W", after: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
66
+ sub boundary + "\u0432", "w", after: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
67
+ # note[2]
68
+ sub boundary + "\u0415", "Ye"
69
+ sub boundary + "\u0435", "ye"
70
+ sub "\u0415", "Ye", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя"), after: boundary
71
+ sub "\u0435", "ye", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя"), after: boundary
72
+
73
+ # note[3] # note[4]
74
+ sub any("\u0423\u04AE"), "W", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
75
+ sub any("\u0443\u04AF"), "w", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
76
+
77
+
78
+
79
+ # CHARACTERS
80
+ parallel {
81
+ sub "\u0410", "A" # А
82
+ sub "\u0411", "B" # Б note[1]
83
+ sub "\u0412", "V" # В
84
+ sub "\u0413", "G" # Г
85
+ sub "\u0492", "Ğ" # Ғ
86
+ sub "\u0414", "D" # Д
87
+ sub "\u0498", "Ź" # Ҙ
88
+ sub "\u0415", "E" # Е note[2]
89
+ sub "\u0401", "Ë" # Ё
90
+ sub "\u0416", "J" # Ж
91
+ sub "\u0417", "Z" # З
92
+ sub "\u0418", "I" # И
93
+ sub "\u0419", "Y" # Й
94
+ sub "\u041A", "K" # К
95
+ sub "\u04A0", "Q" # Ҡ
96
+ sub "\u041B", "L" # Л
97
+ sub "\u041C", "M" # М
98
+ sub "\u041D", "N" # Н
99
+ sub "\u04A2", "Ñ" # Ң
100
+ sub "\u041E", "O" # О
101
+ sub "\u04E8", "Ö" # Ө
102
+ sub "\u041F", "P" # П
103
+ sub "\u0420", "R" # Р
104
+ sub "\u0421", "S" # С
105
+ sub "\u04AA", "Ś" # Ҫ
106
+ sub "\u0422", "T" # Т
107
+ sub "\u0423", "U" # У
108
+ sub "\u04AE", "Ü" # Ү note[3]
109
+ sub "\u0424", "F" # Ф
110
+ sub "\u0425", "X" # Х
111
+ sub "\u04BA", "H" # Һ
112
+ sub "\u0426", "Ts" # Ц
113
+ sub "\u0427", "Ç" # Ч
114
+ sub "\u0428", "Ş" # Ш
115
+ sub "\u0429", "ŞÇ" # Щ
116
+ sub "\u042A", "" # Ъ
117
+ sub "\u042B", "I" # Ы
118
+ sub "\u042C", "" # Ь
119
+ sub "\u042D", "E" # Э
120
+ sub "\u04D8", "Ə" # Ә
121
+ sub "\u042E", "Yu" # Ю
122
+ sub "\u042F", "Ya" # Я
123
+
124
+ sub "\u0430", "a" # а
125
+ sub "\u0431", "b" # б
126
+ sub "\u0432", "v" # в note[1]
127
+ sub "\u0433", "g" # г
128
+ sub "\u0493", "ğ" # ғ
129
+ sub "\u0434", "d" # д
130
+ sub "\u0499", "ź" # ҙ
131
+ sub "\u0435", "e" # e note[2]
132
+ sub "\u0451", "yo" # ё
133
+ sub "\u0436", "j" # ж
134
+ sub "\u0437", "z" # з
135
+ sub "\u0438", "i" # и
136
+ sub "\u0439", "y" # й
137
+ sub "\u043A", "k" # к
138
+ sub "\u04A1", "q" # ҡ
139
+ sub "\u043B", "l" # л
140
+ sub "\u043C", "m" # м
141
+ sub "\u043D", "n" # н
142
+ sub "\u04A3", "ñ" # ң
143
+ sub "\u043E", "o" # о
144
+ sub "\u04E9", "ö" # ө
145
+ sub "\u043F", "p" # п
146
+ sub "\u0440", "r" # р
147
+ sub "\u0441", "s" # с
148
+ sub "\u04AB", "ś" # ҫ
149
+ sub "\u0442", "t" # т
150
+ sub "\u0443", "u" # у
151
+ sub "ү", "ü" # ү note[3]
152
+ sub "\u0444", "f" # ф
153
+ sub "\u0445", "x" # х
154
+ sub "\u04BB", "h" # һ
155
+ sub "\u0446", "ts" # ц
156
+ sub "\u0447", "ç" # ч
157
+ sub "\u0448", "ş" # ш
158
+ sub "\u0449", "şç" # щ
159
+ sub "\u044A", "" # ъ
160
+ sub "\u044B", "ı" # ы
161
+ sub "\u044C", "" # ь
162
+ sub "\u044D", "e" # э
163
+ sub "\u04D9", "ə" # ә
164
+ sub "\u044E", "yu" # ю
165
+ sub "\u044F", "ya" # я
166
+ }
167
+
168
+ }
169
+
@@ -0,0 +1,296 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2008
4
+ language: bal
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF BALUCHI -- BGN/PCGN 2008 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693687/ROMANIZATION_OF_BALUCHI.pdf
9
+ creation_date: 2008
10
+ confirmation_date: 2017-11
11
+ description: |
12
+ The following is the BGN/PCGN-approved romanization
13
+ system for deriving standard spellings of Baluchi
14
+ geographic names. The romanization system is based on
15
+ the Hunterian system of romanization, which has been
16
+ used by the Surveys of India and Pakistan for
17
+ romanizing Baluchi geographic names for more than one
18
+ hundred years. The romanization system is compatible
19
+ with all dialects of Baluchi, including Eastern
20
+ Baluchi, Western Baluchi, and Southern Baluchi.
21
+
22
+ The BGN/PCGN system laid out below includes diacritical
23
+ marks in order that the original script can be derived
24
+ from the romanized form (i.e. it is reversible). For
25
+ desk users requiring a diacritic-free form, these
26
+ diacritics can simply be removed. In almost every case
27
+ the same basic Roman-script characters are kept as are
28
+ used in the Hunterian system. The BGN/PCGN forms have
29
+ further been designed to harmonize with the BGN/PCGN
30
+ Urdu romanization system. In rigorous romanization
31
+ (i.e. including diacritics), retroflexion is marked by
32
+ a sub-dot, and aspiration is marked by an apostrophe,
33
+ where confusion with fricative digraphs could arise.
34
+ For letters used only in Arabic loan words, the
35
+ rigorous forms have further been designed to harmonize
36
+ with the BGN/PCGN Persian romanization system.
37
+
38
+ notes:
39
+ - Occasionally, sequences of /z/ or /s/ plus /h/ may be
40
+ encountered, i.e. z·h, s·h. These may be romanized with the
41
+ Unicode 'center dot' (U+00B7) separating the two letters,
42
+ to distinguish them from the digraphs /zh/ and /sh/.
43
+
44
+ - The character ة is found very rarely in Baluchi, principally in certain Arabic religious terms, e.g. zakāt
45
+ ('alms'). It should be romanized t.
46
+
47
+ - When the letters ال are found, representing the Arabic
48
+ definite article, the ل is assimilated to a following 'sun letter' ,د ,ث ,ت
49
+ ل ,ظ ,ط , ض , ,ص ,ش ,س , ,ر ,ذ or ن and is romanized t, , d, , r, z, s, sh, ş, ẕ ţ z , l, n accordingly.
50
+
51
+ - In romanization, the suffixes ءَ (-ā, singular definite)
52
+ and ءِ (-ay, possessive) are connected to the previous word
53
+ by a hyphen, though they are usually written separately.
54
+
55
+ - The word for 'and', written as و or ءُ, should be
56
+ romanized as –u-, linked by hyphens to the two words it
57
+ connects; e.g.,
58
+ ہ ٹد و س ٹد → Sind-u-Hind ('The Gangetic Plain').
59
+
60
+ - Except as specified in notes 4 and 5, word division in romanization should follow word division in the Baluchi script.
61
+
62
+ - Note that the short vowels in the Baluchi examples are not pointed.
63
+
64
+ - Certain initial, medial and final characters are not
65
+ readily available in a Unicode-encoded font in a standalone form.
66
+
67
+ - The Romanization columns show only lowercase forms but,
68
+ when romanizing, uppercase and lowercase Roman letters as
69
+ appropriate should be used.
70
+ }
71
+
72
+ tests {
73
+ # commented tests are blocked by https://github.com/interscript/interscript/issues/620
74
+ # 'cultivable patch of riverbed'
75
+ test "بےنٹَگ", "Benṭag"
76
+ # 'Japan'
77
+ test "جاپان", "Jāpān"
78
+ test "اَرَبِستان", "Arabistān"
79
+ test "بُنجاه", "Bunjāh"
80
+ test "بَلوچِستان", "Balochistān"
81
+ # 'village'
82
+ test "حَلق", "Ḩalq"
83
+ # 'foothills or skirts of a mountain'
84
+ test "دامان", "Dāmān"
85
+ test "ڈاڈَر", "Ḍāḍar"
86
+ # 'tomb'
87
+ test "گُمبُذ", "Gumbud͟h"
88
+ # 'crossroads'
89
+ test "چار راہ", "Chār Rāh"
90
+ # 'market'
91
+ test "بازار", "Bāzār"
92
+ test "سےبِى", "Sebī"
93
+ # - source: اِيشيا
94
+ # expected: Eshyā
95
+ # # 'homeland'
96
+ # - source: وَطَن
97
+ # expected: Waţan
98
+ # 'Bandar Abbas'
99
+ test "عَبّاس", "‘Abbās"
100
+ # 'Taiwan'
101
+ test "فارموسا", "Fārmosā"
102
+ test "ڈاک", "Ḍāk"
103
+ # 'stream, irrigated area, pasture'
104
+ test "مَلّ", "Mall"
105
+ # - source: ہ یرات
106
+ # expected: Herāt
107
+ # 'Philippines'
108
+ test "فِلپائِن", "Filpā’in"
109
+ test "مُرگاپ", "Murgāp"
110
+ # - source: مَرو
111
+ # expected: Marw
112
+ }
113
+
114
+ stage {
115
+ # CHARACTERS
116
+ parallel {
117
+
118
+ # consonant characters
119
+
120
+ sub "\u0628", "b" # ب
121
+ sub "\u067E", "p" # پ
122
+ sub "\u062a", "t" # ت
123
+ sub "\u0679", "ṭ" # see note 8 ٹ
124
+ sub "\u067C", "ṭ" # see note 8 ټ
125
+ sub "\u062B", "t͟h" # see note 8 ث
126
+ sub "\u067F", "t͟h" # see note 8 ٿ
127
+ sub "\u062c", "j" # ج
128
+ sub "\u0686", "ch" # ‫چ‬
129
+ sub "\u062d", "ḩ" # ح
130
+ sub "\u062e", "kh" # خ
131
+ sub "\u062f", "d" # د
132
+ sub "\u0688", "ḍ" # ڈ
133
+ sub "\u0689", "ḍ" # ‫ډ‬
134
+ sub "\u0630", "d͟h" # ذ
135
+ sub "\u0631", "r" # ر
136
+ sub "\u0691", "ṛ" # see note 8 ڑ
137
+ sub "\u0693", "ṛ" # see note 8 ړ
138
+ sub "\u0632", "z" # ز
139
+ sub "\u0698", "zh" # ‫ژ‬
140
+ sub "\u0633", "s" # س
141
+ sub "\u0634", "sh" # ش
142
+ sub "\u0635", "ş" # ص
143
+ sub "\u0636", "ẕ" # ض
144
+ sub "\u0637", "ţ" # ط
145
+ sub "\u0638", "z̧" # ظ
146
+ sub "\u0639", "‘" # ع
147
+ sub "\u063a", "gh" # غ
148
+ sub "\u0641", "f" # ف
149
+ sub "\u0642", "q" # ق
150
+ sub "\u0643", "k" # ك
151
+ sub "\u06A9", "k" # ک
152
+ sub "\u06AF", "g" # ‫گ‬
153
+ sub "\u0644", "l" # ل
154
+ sub "\u0645", "m" # م
155
+ sub "\u0646", "n" # ن
156
+ sub "\u06BA", "ñ" # ں
157
+ sub "\u0648", "o" # و
158
+
159
+ sub "\u0648", "w" # و
160
+ sub "\u0647", "h" # ه
161
+ sub "\u06C1", "h"
162
+ sub "\u06BE", "h"
163
+ sub "\u0621", "’" # ء
164
+ sub "\u0626", "’" # ئ
165
+ sub "\u0649", "y" # ي
166
+ sub "\u064A", "y" # ي
167
+
168
+
169
+ # Aspiration is only contrastive in Eastern Baluchi
170
+ sub "\u0628\u06BE", "bh"
171
+
172
+ # Aspiration is only contrastive in Eastern Baluchi
173
+ sub "\u067E\u06BE", "ph"
174
+
175
+ # Aspiration is only contrastive in Eastern Baluchi.
176
+ # Apostrophe distinguishes from fricative /th/.
177
+ sub "\u062A\u06BE", "th’"
178
+
179
+ # Aspiration is only contrastive in Eastern Baluchi
180
+ sub "\u0679\u06BE", "ṭh"
181
+
182
+ # Aspiration is only contrastive in Eastern Baluchi
183
+ sub "\u062C\u06BE", "jh"
184
+
185
+ # Aspiration is only contrastive in Eastern Baluchi
186
+ sub "\u0686\u06BE", "chh"
187
+
188
+ # Aspiration is only contrastive in Eastern Baluchi.
189
+ # Apostrophe distinguishes from fricative /dh/
190
+ sub "\u062D\u06BE", "dh’"
191
+
192
+ # Aspiration is only contrastive in Eastern Baluchi
193
+ sub "\u0688\u06BE", "ḍh"
194
+
195
+ # Aspiration is only contrastive in Eastern Baluchi
196
+ sub "\u0631\u06BE", "\u1E5B\u0068"
197
+
198
+ # Aspiration is only contrastive in Eastern Baluchi.
199
+ # Apostrophe distinguishes from fricative /kh/
200
+ sub "\u06A9\u06BE", "kh’"
201
+
202
+ # Aspiration is only contrastive in Eastern Baluchi.
203
+ # Apostrophe distinguishes from fricative /gh/
204
+ sub "\u06AF\u06BE", "gh’" #
205
+ sub "\u0644\u0627", "lā" #
206
+ sub "\u06A9\u0627", "kā" #
207
+ sub "\u06AF\u0627", "gā" #
208
+ sub "\u06A9\u0644", "kl" #
209
+ sub "\u06AF\u0644", "gl" #
210
+
211
+ # Vowels, Diphthongs, and Diacritical Marks
212
+ sub "\u0650\u0649", "ī" # ـِي
213
+ sub "\u0650", "i" # ِ
214
+ sub "\u06D2", "e" # ـے
215
+ sub boundary + "\u0627", "" # ا
216
+ sub "\u0627", "ā" # ا
217
+ sub "\u0622", "ā" # آ
218
+ sub "\u064E", "a" # َ
219
+ sub "\u064F", "u" # ُ
220
+ sub "\u064F\u0648", "ū" # ـُو
221
+ sub "\u064E\u06D2", "ay" # ـَي
222
+ sub "\u064E\u0648", "aw" # ـَو
223
+ sub "\u0652", "" # Not Romanized
224
+ sub "\u0670", "á" #
225
+
226
+ sub "\u0628\u0651", "bb" # ب
227
+ sub "\u067E\u0651", "pp" # پ
228
+ sub "\u062a\u0651", "tt" # ت
229
+ sub "\u0679\u0651", "ṭṭ" # see note 8 ٹ
230
+ sub "\u067C\u0651", "ṭṭ" # see note 8 ټ
231
+ sub "\u062B\u0651", "t͟ht͟h" # see note 8 ث
232
+ sub "\u067F\u0651", "t͟ht͟h" # see note 8 ٿ
233
+ sub "\u062c\u0651", "jj" # ج
234
+ sub "\u0686\u0651", "chch" # ‫چ‬
235
+ sub "\u062d\u0651", "ḩḩ" # ح
236
+ sub "\u062e\u0651", "khkh" # خ
237
+ sub "\u062f\u0651", "dd" # د
238
+ sub "\u0688\u0651", "ḍḍ" # ڈ
239
+ sub "\u0689\u0651", "ḍḍ" # ‫ډ‬
240
+ sub "\u0630\u0651", "d͟hd͟h" # ذ
241
+ sub "\u0631\u0651", "rr" # ر
242
+ sub "\u0691\u0651", "ṛṛ" # see note 8 ڑ
243
+ sub "\u0693\u0651", "ṛṛ" # see note 8 ړ
244
+ sub "\u0632\u0651", "zz" # ز
245
+ sub "\u0698\u0651", "zhzh" # ‫ژ‬
246
+ sub "\u0633\u0651", "ss" # س
247
+ sub "\u0634\u0651", "shsh" # ش
248
+ sub "\u0635\u0651", "şş" # ص
249
+ sub "\u0636\u0651", "ẕẕ" # ض
250
+ sub "\u0637\u0651", "ţţ" # ط
251
+ sub "\u0638\u0651", "z̧z̧" # ظ
252
+ sub "\u0639\u0651", "‘‘" # ع
253
+ sub "\u063a\u0651", "ghgh" # غ
254
+ sub "\u0641\u0651", "ff" # ف
255
+ sub "\u0642\u0651", "qq" # ق
256
+ sub "\u0643\u0651", "kk" # ك
257
+ sub "\u06A9\u0651", "kk" # ک
258
+ sub "\u06AF\u0651", "gg" # ‫گ‬
259
+ sub "\u0644\u0651", "ll" # ل
260
+ sub "\u0645\u0651", "mm" # م
261
+ sub "\u0646\u0651", "nn" # ن
262
+ sub "\u06BA\u0651", "ññ" # ں
263
+ sub "\u0648\u0651", "ww" # و
264
+ sub "\u0647\u0651", "hh" # ه
265
+ sub "\u06C1\u0651", "hh"
266
+ sub "\u06BE\u0651", "hh"
267
+ sub "\u0621\u0651", "’’" # ء
268
+ sub "\u0626\u0651", "’’" # ئ
269
+ sub "\u0649\u0651", "yy" # ي
270
+
271
+ sub "\u0621\u064E", "-ā" # see note 4
272
+ sub "\u0621\u0650", "-ay" # see note 4
273
+
274
+ # Numerals
275
+ sub "۰", "0"
276
+ sub "۱", "1"
277
+ sub "۲", "2"
278
+ sub "۳", "3"
279
+ sub "۴", "4"
280
+ sub "۵", "5"
281
+ sub "۶", "6"
282
+ sub "۷", "7"
283
+ sub "۸", "8"
284
+ sub "۹", "9"
285
+ # Although Perso-Arabic script is written from right to
286
+ # left, numerical expressions, e.g. ۸۶۹۱ → 1968, are
287
+ # written from left to right. A comma is inserted into
288
+ # longer sequences, either after thousands, millions, etc.
289
+ }
290
+
291
+
292
+ # POSTRULES
293
+ sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
294
+
295
+
296
+ }