interscript-maps 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +7 -0
  2. data/README.adoc +28 -0
  3. data/interscript-maps.gemspec +28 -0
  4. data/interscript-maps.yaml +235 -0
  5. data/libs/posix.iml +11 -0
  6. data/libs/unicode.iml +13 -0
  7. data/libs/var-Cyrl.iml +7 -0
  8. data/libs/var-kor.iml +17 -0
  9. data/maps-staging/royin-tha-Thai-Latn-1939-generic.imp +98 -0
  10. data/maps-staging/royin-tha-Thai-Latn-1968.imp +156 -0
  11. data/maps-staging/royin-tha-Thai-Latn-1999-chained.imp +161 -0
  12. data/maps-staging/royin-tha-Thai-Latn-1999.imp +78 -0
  13. data/maps-staging/var-tha-Thai-Thai-phonemic.imp +53 -0
  14. data/maps-staging/var-tha-Thai-Zsym-ipa.imp +273 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.imp +27515 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.imp +392 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.imp +85 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.imp +1171 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.imp +214 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.imp +53 -0
  21. data/maps/alalc-aze-Arab-Latn-1997.imp +321 -0
  22. data/maps/alalc-aze-Cyrl-Latn-1997.imp +101 -0
  23. data/maps/alalc-bel-Cyrl-Latn-1997.imp +118 -0
  24. data/maps/alalc-ben-Beng-Latn-1997.imp +225 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.imp +135 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.imp +110 -0
  27. data/maps/alalc-div-Thaa-Latn-1997.imp +171 -0
  28. data/maps/alalc-ell-Grek-Latn-1997.imp +381 -0
  29. data/maps/alalc-ell-Grek-Latn-2010.imp +382 -0
  30. data/maps/alalc-guj-Gujr-Latn-1997.imp +223 -0
  31. data/maps/alalc-guj-Gujr-Latn-2011.imp +57 -0
  32. data/maps/alalc-hin-Deva-Latn-1997.imp +248 -0
  33. data/maps/alalc-hin-Deva-Latn-2011.imp +63 -0
  34. data/maps/alalc-kan-Kana-Latn-1997.imp +233 -0
  35. data/maps/alalc-kan-Kana-Latn-2011.imp +58 -0
  36. data/maps/alalc-kat-Geok-Latn-1997.imp +109 -0
  37. data/maps/alalc-kat-Geor-Latn-1997.imp +104 -0
  38. data/maps/alalc-kor-Hang-Latn-1997.imp +68 -0
  39. data/maps/alalc-mal-Mlym-Latn-1997.imp +260 -0
  40. data/maps/alalc-mal-Mlym-Latn-2012.imp +65 -0
  41. data/maps/alalc-mar-Deva-Latn-1997.imp +178 -0
  42. data/maps/alalc-mar-Deva-Latn-2011.imp +51 -0
  43. data/maps/alalc-mkd-Cyrl-Latn-1997.imp +125 -0
  44. data/maps/alalc-mkd-Cyrl-Latn-2013.imp +113 -0
  45. data/maps/alalc-mon-Cyrl-Latn-1997.imp +161 -0
  46. data/maps/alalc-ori-Orya-Latn-1997.imp +234 -0
  47. data/maps/alalc-ori-Orya-Latn-2011.imp +59 -0
  48. data/maps/alalc-pan-Guru-Latn-1997.imp +241 -0
  49. data/maps/alalc-pan-Guru-Latn-2011.imp +71 -0
  50. data/maps/alalc-per-Arab-Latn-1997.imp +318 -0
  51. data/maps/alalc-pli-Deva-Latn-2012.imp +140 -0
  52. data/maps/alalc-pra-Deva-Latn-2012.imp +52 -0
  53. data/maps/alalc-rus-Cyrl-Latn-1997.imp +165 -0
  54. data/maps/alalc-rus-Cyrl-Latn-2012.imp +107 -0
  55. data/maps/alalc-san-Deva-Latn-2012.imp +207 -0
  56. data/maps/alalc-sin-Sinh-Latn-1997.imp +246 -0
  57. data/maps/alalc-sin-Sinh-Latn-2011.imp +63 -0
  58. data/maps/alalc-srp-Cyrl-Latn-1997.imp +124 -0
  59. data/maps/alalc-srp-Cyrl-Latn-2013.imp +115 -0
  60. data/maps/alalc-tam-Taml-Latn-1997.imp +52 -0
  61. data/maps/alalc-tam-Taml-Latn-2011.imp +49 -0
  62. data/maps/alalc-tel-Telu-Latn-1997.imp +237 -0
  63. data/maps/alalc-tel-Telu-Latn-2011.imp +58 -0
  64. data/maps/alalc-ukr-Cyrl-Latn-1997.imp +123 -0
  65. data/maps/alalc-ukr-Cyrl-Latn-2011.imp +32 -0
  66. data/maps/apcbg-bul-Cyrl-Latn-1995.imp +194 -0
  67. data/maps/az-aze-Cyrl-Latn-1939.imp +105 -0
  68. data/maps/az-aze-Cyrl-Latn-1958.imp +50 -0
  69. data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +160 -0
  70. data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +165 -0
  71. data/maps/bgn-jpn-Hrkt-Latn-1962.imp +288 -0
  72. data/maps/bgn-kor-Hang-Latn-1943.imp +31 -0
  73. data/maps/bgn-kor-Kore-Latn-1943.imp +33 -0
  74. data/maps/bgna-bul-Cyrl-Latn-2006.imp +119 -0
  75. data/maps/bgna-bul-Cyrl-Latn-2009.imp +119 -0
  76. data/maps/bgnpcgn-amh-Ethi-Latn-1967.imp +393 -0
  77. data/maps/bgnpcgn-ara-Arab-Latn-1956.imp +472 -0
  78. data/maps/bgnpcgn-arm-Armn-Latn-1981.imp +125 -0
  79. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.imp +111 -0
  80. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +169 -0
  81. data/maps/bgnpcgn-bal-Arab-Latn-2008.imp +296 -0
  82. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +200 -0
  83. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.imp +137 -0
  84. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.imp +38 -0
  85. data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +176 -0
  86. data/maps/bgnpcgn-deu-Latn-Latn-2000.imp +56 -0
  87. data/maps/bgnpcgn-div-Thaa-Latn-1972.imp +90 -0
  88. data/maps/bgnpcgn-div-Thaa-Latn-1988.imp +71 -0
  89. data/maps/bgnpcgn-ell-Grek-Latn-1962.imp +443 -0
  90. data/maps/bgnpcgn-ell-Grek-Latn-1996.imp +269 -0
  91. data/maps/bgnpcgn-fao-Latn-Latn-1964.imp +41 -0
  92. data/maps/bgnpcgn-fao-Latn-Latn-1968.imp +28 -0
  93. data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +111 -0
  94. data/maps/bgnpcgn-isl-Latn-Latn-1964.imp +42 -0
  95. data/maps/bgnpcgn-isl-Latn-Latn-1968.imp +32 -0
  96. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.imp +191 -0
  97. data/maps/bgnpcgn-kat-Geor-Latn-1981.imp +116 -0
  98. data/maps/bgnpcgn-kat-Geor-Latn-2009.imp +43 -0
  99. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.imp +193 -0
  100. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.imp +170 -0
  101. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +177 -0
  102. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +40 -0
  103. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +41 -0
  104. data/maps/bgnpcgn-kur-Arab-Latn-2007.imp +240 -0
  105. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.imp +132 -0
  106. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.imp +174 -0
  107. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.imp +168 -0
  108. data/maps/bgnpcgn-nep-Deva-Latn-2011.imp +208 -0
  109. data/maps/bgnpcgn-per-Arab-Latn-1958.imp +312 -0
  110. data/maps/bgnpcgn-prs-Arab-Latn-2007.imp +552 -0
  111. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.imp +445 -0
  112. data/maps/bgnpcgn-pus-Arab-Latn-1968.imp +289 -0
  113. data/maps/bgnpcgn-ron-cyrl-latn-2002.imp +165 -0
  114. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.imp +133 -0
  115. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.imp +195 -0
  116. data/maps/bgnpcgn-sme-Latn-Latn-1984.imp +48 -0
  117. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.imp +55 -0
  118. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.imp +146 -0
  119. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.imp +185 -0
  120. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.imp +188 -0
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.imp +136 -0
  122. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.imp +88 -0
  123. data/maps/bgnpcgn-urd-Arab-Latn-2007.imp +333 -0
  124. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.imp +145 -0
  125. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.imp +74 -0
  126. data/maps/bgnpcgn-zho-Hans-Latn-1979.imp +7463 -0
  127. data/maps/bis-asm-Beng-Latn-13194-1991.imp +154 -0
  128. data/maps/bis-ben-Beng-Latn-13194-1991.imp +151 -0
  129. data/maps/bis-dev-Deva-Latn-13194-1991.imp +178 -0
  130. data/maps/bis-guj-Gujr-Latn-13194-1991.imp +172 -0
  131. data/maps/bis-kan-Kana-Latn-13194-1991.imp +166 -0
  132. data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +170 -0
  133. data/maps/bis-ori-Orya-Latn-13194-1991.imp +168 -0
  134. data/maps/bis-pnj-Guru-Latn-13194-1991.imp +169 -0
  135. data/maps/bis-tel-Telu-Latn-13194-1991.imp +165 -0
  136. data/maps/bis-tml-Taml-Latn-13194-1991.imp +149 -0
  137. data/maps/by-bel-Cyrl-Latn-1998.imp +123 -0
  138. data/maps/by-bel-Cyrl-Latn-2007.imp +77 -0
  139. data/maps/din-grc-Grek-Latn-31634-2011-t1.imp +627 -0
  140. data/maps/din-hin-Deva-Latn-33904-2018.imp +101 -0
  141. data/maps/din-kat-Geor-Latn-32707-2010.imp +103 -0
  142. data/maps/din-mar-Deva-Latn-33904-2018.imp +83 -0
  143. data/maps/din-nep-Deva-Latn-33904-2018.imp +110 -0
  144. data/maps/din-pli-Deva-Latn-33904-2018.imp +72 -0
  145. data/maps/din-pra-Deva-Latn-33904-2018.imp +66 -0
  146. data/maps/din-san-Deva-Latn-33904-2018.imp +294 -0
  147. data/maps/din-tam-Taml-Latn-33903-2016.imp +187 -0
  148. data/maps/dos-nep-Deva-Latn-1997.imp +47 -0
  149. data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +399 -0
  150. data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +397 -0
  151. data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +34 -0
  152. data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +178 -0
  153. data/maps/ggg-kat-Geor-Latn-2002.imp +75 -0
  154. data/maps/gki-bel-Cyrl-Latn-1992.imp +44 -0
  155. data/maps/gki-bel-Cyrl-Latn-2000.imp +159 -0
  156. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.imp +179 -0
  157. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.imp +132 -0
  158. data/maps/hk-yue-Hani-Latn-1888.imp +29201 -0
  159. data/maps/icao-bel-Cyrl-Latn-9303.imp +136 -0
  160. data/maps/icao-bul-Cyrl-Latn-9303.imp +127 -0
  161. data/maps/icao-fas-Arab-Latn-9303.imp +112 -0
  162. data/maps/icao-heb-Hebr-Latn-9303.imp +160 -0
  163. data/maps/icao-mkd-Cyrl-Latn-9303.imp +126 -0
  164. data/maps/icao-rus-Cyrl-Latn-9303.imp +126 -0
  165. data/maps/icao-srp-Cyrl-Latn-9303.imp +126 -0
  166. data/maps/icao-ukr-Cyrl-Latn-9303.imp +127 -0
  167. data/maps/iso-ara-Arab-Latn-233-1984.imp +301 -0
  168. data/maps/iso-asm-Beng-Latn-15919-2001.imp +73 -0
  169. data/maps/iso-ben-Beng-Latn-15919-2001.imp +171 -0
  170. data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +365 -0
  171. data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +43 -0
  172. data/maps/iso-guj-Gujr-Latn-15919-2001.imp +214 -0
  173. data/maps/iso-hin-Deva-Latn-15919-2001.imp +73 -0
  174. data/maps/iso-inc-Deva-Latn-15919-2001.imp +61 -0
  175. data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +59 -0
  176. data/maps/iso-kan-Kana-Latn-15919-2001.imp +212 -0
  177. data/maps/iso-kat-Geor-Latn-9984-1996.imp +103 -0
  178. data/maps/iso-kor-Hang-Latn-1996-method1.imp +140 -0
  179. data/maps/iso-kor-Hang-Latn-1996-method2.imp +132 -0
  180. data/maps/iso-mal-Mlym-Latn-15919-2001.imp +276 -0
  181. data/maps/iso-mar-Deva-Latn-15919-2001.imp +68 -0
  182. data/maps/iso-nep-Deva-Latn-15919-2001.imp +75 -0
  183. data/maps/iso-ori-Orya-Latn-15919-2001.imp +188 -0
  184. data/maps/iso-pan-Guru-Latn-15919-2001.imp +217 -0
  185. data/maps/iso-pli-Beng-Latn-15919-2001.imp +66 -0
  186. data/maps/iso-pli-Deva-Latn-15919-2001.imp +68 -0
  187. data/maps/iso-pli-Sinh-Latn-15919-2001.imp +211 -0
  188. data/maps/iso-pli-Thai-Latn-15919-2001.imp +47 -0
  189. data/maps/iso-pra-Deva-Latn-15919-2001.imp +60 -0
  190. data/maps/iso-prs-Arab-Latn-233-3-1999.imp +352 -0
  191. data/maps/iso-rus-Cyrl-Latn-9-1995.imp +279 -0
  192. data/maps/iso-san-Deva-Latn-15919-2001.imp +215 -0
  193. data/maps/iso-tam-Taml-Latn-15919-2001.imp +153 -0
  194. data/maps/iso-tel-Telu-Latn-15919-2001.imp +214 -0
  195. data/maps/iso-tha-Thai-Latn-11940-1998.imp +114 -0
  196. data/maps/kp-kor-Hang-Latn-2002.imp +540 -0
  197. data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +29005 -0
  198. data/maps/masm-mon-Cyrl-Latn-5217-2012.imp +136 -0
  199. data/maps/masm-mon-Latn-Cyrl-5217-2012.imp +162 -0
  200. data/maps/mext-jpn-Hrkt-Latn-1954.imp +403 -0
  201. data/maps/moct-kor-Hang-Latn-2000.imp +475 -0
  202. data/maps/mofa-jpn-Hrkt-Latn-1989.imp +484 -0
  203. data/maps/mv-div-Thaa-Latn-1987.imp +144 -0
  204. data/maps/mvd-bel-Cyrl-Latn-2008.imp +224 -0
  205. data/maps/mvd-bel-Cyrl-Latn-2010.imp +64 -0
  206. data/maps/mvd-rus-Cyrl-Latn-2008.imp +110 -0
  207. data/maps/mvd-rus-Cyrl-Latn-2010.imp +40 -0
  208. data/maps/odni-ara-Arab-Latn-2004.imp +106 -0
  209. data/maps/odni-ara-Arab-Latn-2015.imp +281 -0
  210. data/maps/odni-aze-Cyrl-Latn-2015.imp +158 -0
  211. data/maps/odni-bel-Cyrl-Latn-2015.imp +138 -0
  212. data/maps/odni-bul-Cyrl-Latn-2005.imp +90 -0
  213. data/maps/odni-bul-Cyrl-Latn-2015.imp +103 -0
  214. data/maps/odni-che-Cyrl-Latn-2015.imp +165 -0
  215. data/maps/odni-fas-Arab-Latn-2004.imp +268 -0
  216. data/maps/odni-fas-Arab-Latn-2015.imp +398 -0
  217. data/maps/odni-hin-Deva-Latn-2004.imp +180 -0
  218. data/maps/odni-hin-Deva-Latn-2015.imp +256 -0
  219. data/maps/odni-kat-Geor-Latn-2015.imp +76 -0
  220. data/maps/odni-kaz-Cyrl-Latn-2015.imp +164 -0
  221. data/maps/odni-kir-Cyrl-Latn-2015.imp +149 -0
  222. data/maps/odni-kor-Hang-Latn-2015.imp +307 -0
  223. data/maps/odni-mkd-Cyrl-Latn-2005.imp +28 -0
  224. data/maps/odni-mkd-Cyrl-Latn-2015.imp +124 -0
  225. data/maps/odni-prs-Arab-Latn-2004.imp +120 -0
  226. data/maps/odni-prs-Arab-Latn-2015.imp +225 -0
  227. data/maps/odni-pus-Arab-Latn-2011.imp +327 -0
  228. data/maps/odni-rus-Cyrl-Latn-2015.imp +79 -0
  229. data/maps/odni-srp-Cyrl-Latn-2005.imp +35 -0
  230. data/maps/odni-srp-Cyrl-Latn-2015.imp +130 -0
  231. data/maps/odni-tat-Cyrl-Latn-2015.imp +157 -0
  232. data/maps/odni-tgk-Cyrl-Latn-2015.imp +161 -0
  233. data/maps/odni-tuk-Cyrl-Latn-2015.imp +159 -0
  234. data/maps/odni-uig-Cyrl-Latn-2015.imp +151 -0
  235. data/maps/odni-ukr-Cyrl-Latn-2015.imp +136 -0
  236. data/maps/odni-urd-Arab-Latn-2015.imp +220 -0
  237. data/maps/odni-uzb-Cyrl-Latn-2015.imp +165 -0
  238. data/maps/sac-zho-Hans-Latn-1979.imp +20940 -0
  239. data/maps/sasm-mon-Mong-Latn-general-1978.imp +294 -0
  240. data/maps/sasm-mon-Mong-Latn-phonetic-1978.imp +261 -0
  241. data/maps/ses-ara-Arab-Latn-1930.imp +225 -0
  242. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.imp +171 -0
  243. data/maps/ua-ukr-Cyrl-Latn-1996.imp +149 -0
  244. data/maps/ua-ukr-Cyrl-Latn-2007.imp +69 -0
  245. data/maps/ua-ukr-Cyrl-Latn-2010.imp +128 -0
  246. data/maps/un-amh-Ethi-Latn-2016.imp +483 -0
  247. data/maps/un-ara-Arab-Latn-1971.imp +137 -0
  248. data/maps/un-ara-Arab-Latn-1972.imp +155 -0
  249. data/maps/un-ara-Arab-Latn-2017.imp +375 -0
  250. data/maps/un-asm-Beng-Latn-1972.imp +188 -0
  251. data/maps/un-bel-Cyrl-Latn-2007.imp +78 -0
  252. data/maps/un-ben-Beng-Latn-2016.imp +516 -0
  253. data/maps/un-ell-Grek-Latn-1987-phonetic.imp +437 -0
  254. data/maps/un-ell-Grek-Latn-1987-tl.imp +27 -0
  255. data/maps/un-ell-Grek-Latn-1987-ts.imp +269 -0
  256. data/maps/un-guj-Gujr-Latn-1972.imp +196 -0
  257. data/maps/un-hin-Deva-Latn-2016.imp +356 -0
  258. data/maps/un-kan-Kana-Latn-2016.imp +214 -0
  259. data/maps/un-mal-Mlym-Latn-1972.imp +215 -0
  260. data/maps/un-mar-Deva-Latn-2016.imp +96 -0
  261. data/maps/un-mon-Mong-Latn-general-2013.imp +170 -0
  262. data/maps/un-mon-Mong-Latn-phonetic-2013.imp +170 -0
  263. data/maps/un-nep-Deva-Latn-1972.imp +295 -0
  264. data/maps/un-nep-Deva-Latn-2013.imp +62 -0
  265. data/maps/un-ori-Orya-Latn-1972.imp +208 -0
  266. data/maps/un-pan-Guru-Latn-1972.imp +321 -0
  267. data/maps/un-prs-Arab-Latn-1967.imp +214 -0
  268. data/maps/un-rus-Cyrl-Latn-1987.imp +96 -0
  269. data/maps/un-sin-Sinh-Latn-1972.imp +193 -0
  270. data/maps/un-tam-Taml-Latn-1972.imp +173 -0
  271. data/maps/un-tel-Telu-Latn-1972.imp +229 -0
  272. data/maps/un-ukr-Cyrl-Latn-1998.imp +58 -0
  273. data/maps/un-ukr-Cyrl-Latn-2012.imp +95 -0
  274. data/maps/un-urd-Arab-Latn-1972.imp +290 -0
  275. data/maps/var-amh-Ethi-Latn-eae-2003.imp +414 -0
  276. data/maps/var-gez-Ethi-Latn-eae-2003.imp +54 -0
  277. data/maps/var-hin-Deva-Latn-hunterian-1872.imp +212 -0
  278. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +399 -0
  279. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +382 -0
  280. data/maps/var-kor-Hang-Hang-jamo.imp +11196 -0
  281. data/maps/var-kor-Hang-Latn-mr-1939.imp +574 -0
  282. data/maps/var-kor-Kore-Hang-2013.imp +59764 -0
  283. data/maps/var-kor-Kore-Latn-mr-1939.imp +36 -0
  284. data/maps/var-mar-Deva-Latn-hunterian-1872.imp +39 -0
  285. data/maps/var-mon-Mong-Latn-1930.imp +101 -0
  286. data/maps/var-mon-Mong-Latn-lessing.imp +181 -0
  287. data/maps/var-mon-Mong-Latn-vpmc.imp +182 -0
  288. data/maps/var-pra-Deva-Latn-iast-1912.imp +36 -0
  289. data/maps/var-san-Deva-Latn-iast-1912.imp +147 -0
  290. data/maps/var-zho-Hani-Latn-wd-1979.imp +27549 -0
  291. metadata +335 -0
@@ -0,0 +1,136 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 1965
4
+ language: iso-639-2:ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1965 System
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816788/ROMANIZATION_OF_UKRAINIAN.pdf
9
+ creation_date: 1947
10
+ confirmation_date: 2019-06
11
+ description: |
12
+ The BGN/PCGN system for Ukrainian was designed for use in romanizing
13
+ names written in the Ukrainian alphabet. The Ukrainian alphabet
14
+ contains five characters not present in the Russian alphabet: ґ, є, і,
15
+ ї, and ’.
16
+
17
+ notes:
18
+ - The character sequences з г, к г, с г, т с and ц г and may be romanized z∙h, k∙h, s∙h, t∙s and ts∙h in order to differentiate those romanizations from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render the characters ж, x, ц, ш, and the character sequence тш.
19
+ - All apostrophes appearing in romanization are Unicode encoding 2019.
20
+ - The Roman‐script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
21
+ }
22
+
23
+ tests {
24
+ test "Авдіївська Міськрада", "Avdiyivs’ka Mis’krada"
25
+ test "Бабаї", "Babayi"
26
+ test "Віленька", "Vilen’ka"
27
+ test "Гагарінський Район", "Haharins’kyy Rayon"
28
+ test "Довбушева Криниця", "Dovbusheva Krynytsya"
29
+ test "Дідівщина", "Didivshchyna"
30
+ test "Економічна", "Ekonomichna"
31
+ test "Єфросинівка", "Yefrosynivka"
32
+ test "Жигуліна Роща", "Zhyhulina Roshcha"
33
+ test "Загір’я", "Zahir”ya"
34
+ test "З’єднувальний Канал", "Z”yednuval’nyy Kanal"
35
+ test "Ивахи", "Yvakhy"
36
+ test "Івано-Франківська Міськрада", "Ivano-Frankivs’ka Mis’krada"
37
+ test "Їжаківка", "Yizhakivka"
38
+ test "Йосиповичі", "Yosypovychi"
39
+ test "Кабичівка", "Kabychivka"
40
+ test "Лазуровий Провулок", "Lazurovyy Provulok"
41
+ test "Мала Сейдеминуха", "Mala Seydemynukha"
42
+ test "Нагірний", "Nahirnyy"
43
+ test "Овер’янівське Озеро", "Over”yanivs’ke Ozero"
44
+ test "Павлопільське Водосховище", "Pavlopil’s’ke Vodoskhovyshche"
45
+ test "Приґородний", "Prygorodnyy"
46
+ test "Радгосп Правда", "Radhosp Pravda"
47
+ test "Садово-Хрустальненський", "Sadovo-Khrustal’nens’kyy"
48
+ test "Таратутине", "Taratutyne"
49
+ test "Улу-Узень", "Ulu-Uzen’"
50
+ test "Христофорівка", "Khrystoforivka"
51
+ test "Центральна Вулиця", "Tsentral’na Vulytsya"
52
+ test "Чайковичі", "Chaykovychi"
53
+ test "Шалаші", "Shalashi"
54
+ test "Щербинівка", "Shcherbynivka"
55
+ test "Южноукраїнська Міськрада", "Yuzhnoukrayins’ka Mis’krada"
56
+ test "Ясениця", "Yasenytsya"
57
+ }
58
+
59
+ stage {
60
+
61
+ # RULES
62
+ sub boundary + "\u2019" + boundary, "”" # ’ in the middle of a word -> ”
63
+
64
+
65
+ # CHARACTERS
66
+ parallel {
67
+ sub "а", "a"
68
+ sub "б", "b"
69
+ sub "в", "v"
70
+ sub "г", "h"
71
+ sub "д", "d"
72
+ sub "е", "e"
73
+ sub "ж", "zh"
74
+ sub "з", "z"
75
+ sub "и", "y"
76
+ sub "й", "y"
77
+ sub "к", "k"
78
+ sub "л", "l"
79
+ sub "м", "m"
80
+ sub "н", "n"
81
+ sub "о", "o"
82
+ sub "п", "p"
83
+ sub "р", "r"
84
+ sub "с", "s"
85
+ sub "т", "t"
86
+ sub "у", "u"
87
+ sub "ф", "f"
88
+ sub "х", "kh"
89
+ sub "ц", "ts"
90
+ sub "ч", "ch"
91
+ sub "ш", "sh"
92
+ sub "щ", "shch"
93
+ sub "ь", "’"
94
+ sub "ю", "yu"
95
+ sub "я", "ya"
96
+ sub "є", "ye"
97
+ sub "і", "i"
98
+ sub "ї", "yi"
99
+ sub "ґ", "g"
100
+ sub "", " "
101
+ sub "Є", "Ye"
102
+ sub "І", "I"
103
+ sub "Ї", "Yi"
104
+ sub "А", "A"
105
+ sub "Б", "B"
106
+ sub "В", "V"
107
+ sub "Г", "H"
108
+ sub "Д", "D"
109
+ sub "Е", "E"
110
+ sub "Ж", "Zh"
111
+ sub "З", "Z"
112
+ sub "И", "Y"
113
+ sub "Й", "Y"
114
+ sub "К", "K"
115
+ sub "Л", "L"
116
+ sub "М", "M"
117
+ sub "Н", "N"
118
+ sub "О", "O"
119
+ sub "П", "P"
120
+ sub "Р", "R"
121
+ sub "С", "S"
122
+ sub "Т", "T"
123
+ sub "У", "U"
124
+ sub "Ф", "F"
125
+ sub "Х", "Kh"
126
+ sub "Ц", "Ts"
127
+ sub "Ч", "Ch"
128
+ sub "Ш", "Sh"
129
+ sub "Щ", "Shch"
130
+ sub "Ь", "’"
131
+ sub "Ю", "Yu"
132
+ sub "Я", "Ya"
133
+ sub "Ґ", "G"
134
+ }
135
+
136
+ }
@@ -0,0 +1,88 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2019
4
+ language: iso-639-2:ukr
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: BGN/PCGN 2019 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/864314/ROMANIZATION_OF_UKRAINIAN.pdf
9
+ creation_date: 2019
10
+ confirmation_date: 2020-01
11
+ description: |
12
+ The BGN/PCGN system for Ukrainian was designed for use in romanizing names written
13
+ in the Ukrainian alphabet. It is an adoption of the Ukrainian national system in use
14
+ since 2010, and supersedes the BGN/PCGN 1965 System for Ukrainian.
15
+
16
+ notes:
17
+ - |
18
+ The 2019 system was adopted by BGN and PCGN after monitoring a good level of implementation
19
+ of the national system within Ukraine. Note, however, that this system is not recommended for
20
+ reverse transliteration; take caution when attempting to convert a romanized name back into Ukrainian.
21
+ This system also lacks the methodology outlined in the 1965 System to provide additional
22
+ differentiation between digraphs and individual character sequences.
23
+ For example, unlike the 1965 System, the 2019 System doesn’t differentiate the special character
24
+ sequences зг, кг, сг, тс, and тсг (previously romanized as z∙h, k∙h, s∙h, t∙s, and ts∙h)
25
+ from the digraphs zh, kh, sh, ts, and the letter sequence tsh, which are used to render
26
+ the characters ж, х, ш, ц and the character sequence тш.
27
+ - To use the keyboard Unicode function, hold ALT and enter in sequence listed in the table.
28
+ - The character sequence З Г, previously romanized as zh, is romanized zgh under the 2019 system.
29
+ - These characters differ significantly in romanization from the BGN/PCGN 1965 system.
30
+ }
31
+
32
+ tests {
33
+ test "Алушта", "Alushta"
34
+ test "Борщагівка", "Borshchahivka"
35
+ test "Вишгород", "Vyshhorod"
36
+ test "Гадяч", "Hadiach"
37
+ test "Згорани", "Zghorany"
38
+ test "Ґалаґан", "Galagan"
39
+ test "Дон", "Don"
40
+ test "Рівне", "Rivne"
41
+ test "Єнакієве", "Yenakiieve"
42
+ test "Наєнко", "Naienko"
43
+ test "Житомир", "Zhytomyr"
44
+ test "Запоріжжя", "Zaporizhzhia"
45
+ test "Закарпаття", "Zakarpattia"
46
+ test "Медвин", "Medvyn"
47
+ test "Іршава", "Irshava"
48
+ test "Їжакевич", "Yizhakevych"
49
+ test "Кадіївка", "Kadiivka"
50
+ test "Йосипівка", "Yosypivka"
51
+ test "Стрий", "Stryi"
52
+ test "Київ", "Kyiv"
53
+ test "Лебедин", "Lebedyn"
54
+ test "Миколаїв", "Mykolaiv"
55
+ test "Ніжин", "Nizhyn"
56
+ test "Одеса", "Odesa"
57
+ test "Полтава", "Poltava"
58
+ test "Ромни", "Romny"
59
+ test "Суми", "Sumy"
60
+ test "Тетерів", "Teteriv"
61
+ test "Ужгород", "Uzhhorod"
62
+ test "Фастів", "Fastiv"
63
+ test "Харків", "Kharkiv"
64
+ test "Біла Церква", "Bila Tserkva"
65
+ test "Чернівці", "Chernivtsi"
66
+ test "Шостка", "Shostka"
67
+ test "Гоща", "Hoshcha"
68
+ test "Русь", "Rus"
69
+ test "Юрій", "Yurii"
70
+ test "Крюківка", "Kriukivka"
71
+ test "Яготин", "Yahotyn"
72
+ test "Ічня", "Ichnia"
73
+ test "Знам’янка", "Znamianka"
74
+ }
75
+
76
+ dependency "un-ukr-Cyrl-Latn-2012", as: cyrllatn
77
+
78
+
79
+ stage {
80
+
81
+ run map.cyrllatn.stage.main
82
+
83
+ # CHARACTERS
84
+ parallel {
85
+ sub "'", "" # ' ->
86
+ }
87
+
88
+ }
@@ -0,0 +1,333 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2007
4
+ language: iso-639-2:urd
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: BGN/PCGN Romanization System -- Urdu (2007)
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693788/ROMANIZATION_OF_URDU.pdf
9
+ creation_date: 2007
10
+ confirmation_date: 2017-11
11
+ description: |
12
+ The following is the approved romanization system for
13
+ deriving standard spellings of Urdu geographical names for
14
+ Pakistan. It was jointly adopted by BGN and PCGN at the
15
+ 23rd BGN/PCGN Conference in Washington, DC, in 2007 and it
16
+ is based on the Hunterian romanization system for Urdu,
17
+ which has been used by the Surveys of India and Pakistan
18
+ for romanizing Urdu geographical names for more than one
19
+ hundred years. The BGN/PCGN system laid out below includes
20
+ diacritical marks in order that the original script can be
21
+ derived from the romanized form (i.e. it is reversible).
22
+ For desk users requiring a diacritic-free form, these
23
+ diacritics can simply be removed. In every case the same
24
+ basic Roman-script characters are kept as are used in the
25
+ Hunterian system. The BGN/PCGN forms have further been
26
+ designed to harmonize with the BGN/PCGN Persian
27
+ romanization system.
28
+ notes:
29
+ - 1. When the vowel sign zīr ( ِ) occurs word-finally in the
30
+ first element of a compound, it is assumed to mark the
31
+ Persian izafat
32
+ morpheme, and is romanized -e, not i.
33
+ - 2. The source of almost all example names is the 1951
34
+ Census of Pakistan, Village List, Northwest Frontier
35
+ Province, Chitral
36
+ State. Office of the Provincial Superintendant of Census,
37
+ North-West Frontier Province, Peshawar.
38
+ - 3. No examples of aspirated dental r (rh, رھ ( were found,
39
+ though this phoneme is assumed to be part of the phonology
40
+ of
41
+ Urdu, and was therefore left out of Table 2.
42
+ - 4. Note that the short vowels in the Urdu examples are not
43
+ pointed.
44
+ - 5. Occasionally, sequences of /z/ or /s/ plus /h/ may be
45
+ encountered, i.e. z·h, s·h. These may be romanized with the
46
+ Unicode
47
+ 'center dot' (U+00B7) separating the two letters, to
48
+ distinguish them from the digraphs /zh/ and /sh/.
49
+ - Commented tests are blocked by this issue https://github.com/interscript/interscript/issues/572
50
+ depends on the different ways of handling ي to y or e AND و to u or o
51
+ }
52
+
53
+ tests {
54
+ # - source: بوغدِی
55
+ # expected: Boghdī
56
+ test "پَالِير", "Pālīr"
57
+ # - source: بیزوت كَلے
58
+ # expected: Bezot Kale
59
+ # - source: عَمَل كوٹ
60
+ # expected: ‘Amal Koṭ
61
+ test "ثَابِر", "S̄ābir"
62
+ test "شَاه نَثَار ميلة", "Shāh Nas̄ār Mylah"
63
+ # - source: بَرجُو ميلَه
64
+ # expected: Barjū Melah
65
+ test "چَپرِی", "Chaprī"
66
+ test "أَحمَد خَان كَلے", "Aḩmad Khān Kale"
67
+ # - source: آكَا خيل
68
+ # expected: Ākā Khel
69
+ test "دُرَانِي", "Durānī"
70
+ test "ڈَنگِیلا", "Ḍangīlā"
71
+ test "ذَرَانِی", "Z̄arānī"
72
+ test "بُركِي", "Burkī"
73
+ test "گِیدَڑَه", "Gīdaṛah"
74
+ test "عَلِي زَائِي", "‘Alī Zā’ī"
75
+ # - source: ژوب
76
+ # expected: Zhob
77
+ test "بِسَاتُو", "Bisātū"
78
+ test "أَحمَدِي شَامَا", "Aḩmadī Shāmā"
79
+ test "اَصَالَت كَلے", "Aşālat Kale"
80
+ test "خَضَر خَان", "Khaẕar Khān"
81
+ test "سُلْطَان", "Sulţān"
82
+ test "عَزَم سَيِّد نُور كَلے", "‘Azam Sayyid Nūr Kale"
83
+ # - source: عَلَم شير
84
+ # expected: ‘Alam Sher
85
+ test "بغَاكِي", "Bghākī"
86
+ # - source: مُظَفَر كوٹ
87
+ # expected: Muz̧afar Koṭ
88
+ test "حَقدَرَه", "Ḩaqdarah"
89
+ test "کَچکِینَہ", "Kachkīnah"
90
+ test "بَاگَن", "Bāgan"
91
+ test "بُلبَلَک", "Bulbalak"
92
+ test "بِلیَامِین", "Bilyāmīn"
93
+ test "نَہر", "Nahr"
94
+ # - source: جوکَالِیَاں
95
+ # expected: Jokālīāñ
96
+ test "اَرَوْالِی", "Arawālī"
97
+ # - source: هیروشاه
98
+ # expected: Heroshāh
99
+ test "مَہردِی", "Mahrdī"
100
+ test "بَڑھ", "Baṛh"
101
+ # - source: شِیوَاؤ
102
+ # expected: Shīwā’o
103
+ test "یَاردَا کَلے", "Yārdā Kale"
104
+ test "بهَائِي خَان", "Bhā’ī Khān"
105
+ test "پھاشک", "Phāshk"
106
+ test "تھَلّ", "Thall"
107
+ test "پَٹھان ريَا", "Paṭhān Ryā"
108
+ test "جھِیل", "Jhīl"
109
+ test "غَزْنِي سْپِين", "Ghaznī Spīn"
110
+ test "بَادشَاه چھُم", "Bādshāh Chhum"
111
+ test "سِندھ", "Sindh"
112
+ test "ڈھَنڈ", "Ḍhanḍ"
113
+ # - source: غوزگَڑھِی
114
+ # expected: Ghozgaṛhī
115
+ # - source: دوغَل گاکھَر
116
+ # expected: Doghal Gākhar
117
+ test "خَان گھَڑِی", "Khān Ghaṛī"
118
+ test "غُلَامَک كَلے", "Ghulāmak Kale"
119
+ # - source: کاراخیل
120
+ # expected: Kārākhel
121
+ test "خَپیَنگا", "Khapyangā"
122
+ test "گَندَه كَلے", "Gandah Kale"
123
+ # - source: گُلونَا ڈھيرِي
124
+ # expected: Gulonā Ḍherī
125
+ # - source: خيرَه دِين
126
+ # expected: Kherah Dīn
127
+ test "مَورپِتھِی", "Maurpithī"
128
+ test "درے پلارِی", "Dre Plārī"
129
+ test "آگرَہ", "Āgrah"
130
+ test "ڈَنڈَر", "Ḍanḍar"
131
+ # - source: گِیدو
132
+ # expected: Gīdo
133
+ test "گُبازانَہ", "Gubāzānah"
134
+ # - source: اُوشو
135
+ # expected: Ūsho
136
+ test "حَےدَر عَلِی كَلے", "Ḩaidar ‘Alī Kale"
137
+ test "تَودَہ چِینَہ", "Taudah Chīnah"
138
+ test "مُوسى خَان كَلے", "Mūsá Khān Kale"
139
+ test "مُلَّا بَاغ", "Mullā Bāgh"
140
+ }
141
+
142
+ stage {
143
+
144
+ # CHARACTERS
145
+ parallel {
146
+ # special rules
147
+
148
+ sub space, "", after: "\u0622\u0628\u064E\u0627\u062F" # space followed by abad is removed
149
+ sub "\ufdf2", "Allāh" # See note 5
150
+
151
+ # Vowels, Diphthongs, and Diacritical Marks
152
+ sub "\u064e", "a" # َ fatha
153
+ sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
154
+ sub "\u064e", "", after: "a" + any("h|t") # َ fatha followed by ta' marboota, handling different order of conversion
155
+
156
+ sub "\u0652", "" # ْ sokoon
157
+ sub "\u0659", "ê"
158
+
159
+ sub "\u0650" + any("\u064a|\u06cc"), "ī" # ـِي kasra followed by ي
160
+ sub "\u0650", "i" # karsra
161
+ sub "\u06d2", "e" # ـے
162
+
163
+ sub "\u0622", "ā" # آ
164
+ sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
165
+ sub "\u0627", "ā" # ا
166
+ sub boundary + "\u0627", "" # ا
167
+
168
+ sub "\u0648", "o" # و # suspect
169
+ sub "\u064f", "u" # ُ damma
170
+ sub "\u064f\u0648", "ū" # ـُو damma followed by و
171
+
172
+ sub "\u064e\u06d2", "ai" # ـے
173
+ sub "\u064e\u0648", "au" # ـَو
174
+ sub "\u064e\u064a\u0651", "ayy" # ـَو
175
+
176
+ sub "\u0670", "á" # ىٰ
177
+ sub "\u0649", "á" # ىٰ
178
+
179
+ # shadda
180
+ sub "\u0628\u0651", "bb" # ب
181
+ sub "\u062a\u0651", "tt" # ت
182
+ sub "\u062b\u0651", "thth" # ث
183
+ sub "\u062c\u0651", "jj" # ج
184
+ sub "\u062d\u0651", "ẖẖ" # ح
185
+ sub "\u062e\u0651", "khkh" # خ
186
+ sub "\u062f\u0651", "dd" # د
187
+ sub "\u0630\u0651", "z̄z̄" # ذ
188
+ sub "\u0631\u0651", "rr" # ر
189
+ sub "\u0632\u0651", "zz" # ز
190
+ sub "\u0633\u0651", "ss" # س
191
+ sub "\u0634\u0651", "sh" # ش
192
+ sub "\u0635\u0651", "şş" # ص
193
+ sub "\u0636\u0651", "ḏḏ" # ض
194
+ sub "\u0637\u0651", "ţţ" # ط
195
+ sub "\u0638\u0651", "z̧z̧" # ظ
196
+ sub "\u063a\u0651", "ghgh" # غ
197
+ sub "\u0641\u0651", "ff" # ف
198
+ sub "\u0642\u0651", "qq" # ق
199
+ sub "\u0643\u0651", "kk" # ك
200
+ sub "\u0644\u0651", "ll" # ل
201
+ sub "\u0645\u0651", "mm" # م
202
+ sub "\u0646\u0651", "nn" # ن
203
+ sub "\u0647\u0651", "hh" # ه
204
+ sub "\u0648\u0651", "ww" # و
205
+ sub any("\u064a|\u06cc") + "\u0651", "yy" # ي
206
+ sub "\u064e\u064a", "yy" # ي
207
+
208
+ # NOTE 1
209
+ sub "\u0650" + boundary, "-e" # ِ kasra
210
+ sub "\u0674", "-e" # ٴ
211
+ sub "\u0654", "-e" # ٔ
212
+
213
+ sub "\u0650\u064a\u0651\u064e", "īy" # ـِيَّ
214
+ sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
215
+ sub "\u064e\u0649", "ay" # ـَى fatha followed by ى which is ا not ي
216
+ sub "\u064e\u0648\u0652", "aw" # ـَوْ
217
+ sub "\u064e\u064a\u0652", "ay" # ـَيْ
218
+ sub "\u0650\u06cc\u0651\u064e", "īy" # ـِيَّ
219
+ sub "\u064e\u064a", "aī" # ـَي
220
+ sub "\u064e\u06cc", "aī" # ـَي
221
+ # - '-ye'
222
+
223
+
224
+ # ta' marboota
225
+ sub "\u0629", "at" # ة in the middle of the sentence
226
+ sub "\u0629" + line_end, "ah"
227
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
228
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
229
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
230
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
231
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
232
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
233
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
234
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
235
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
236
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
237
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
238
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
239
+
240
+
241
+
242
+ sub "\u0621", "’" # ء
243
+ sub "\u0624", "’" # ؤ
244
+ sub "\u0624" + boundary, "’o" # ؤ
245
+ sub "\u0626", "’" # ئ
246
+
247
+ sub "\u0623", "" # أ
248
+ sub "\u0625", "" # إ
249
+ # See note B
250
+ sub boundary + "\u0627\u0644", "al " # ال
251
+ # '\uFE8E' : '' # ﺎ
252
+
253
+ # Sun letters
254
+ sub boundary + "\u0627\u0644\u062a", "at t" # الت
255
+ sub boundary + "\u0627\u0644\u062b", "as̄ s̄" # الث
256
+ sub boundary + "\u0627\u0644\u062f", "ad d" # الد
257
+ sub boundary + "\u0627\u0644\u0630", "az̄ z̄" # الذ
258
+ sub boundary + "\u0627\u0644\u0631", "ar r" # الر
259
+ sub boundary + "\u0627\u0644\u0632", "az z" # الز
260
+ sub boundary + "\u0627\u0644\u0633", "as s" # الس
261
+ sub boundary + "\u0627\u0644\u0634", "ash sh" # الش
262
+ sub boundary + "\u0627\u0644\u0635", "aş ş" # الص
263
+ sub boundary + "\u0627\u0644\u0636", "aẕ ẕ" # الض
264
+ sub boundary + "\u0627\u0644\u0637", "aţ ţ" # الط
265
+ sub boundary + "\u0627\u0644\u0638", "az̧ z̧" # الظ
266
+ sub boundary + "\u0627\u0644\u0644", "al l" # الل
267
+ sub boundary + "\u0627\u0644\u0646", "an n" # الن
268
+
269
+
270
+ # consonant characters
271
+
272
+ sub "\u0628", "b" # ب
273
+ sub "\u067E", "p" # پ
274
+ sub "\u062a", "t" # ت
275
+ sub "\u0679", "ṭ" # ٹ
276
+ sub "\u062B", "s̄" # ث
277
+ sub "\u062c", "j" # ج
278
+ sub "\u0686", "ch" # ‫چ‬
279
+ sub "\u062d", "ḩ" # ح
280
+ sub "\u062e", "kh" # خ
281
+ sub "\u062f", "d" # د
282
+ sub "\u0688", "ḍ" # ‫ڈ
283
+ sub "\u0630", "z̄" # ذ
284
+ sub "\u0631", "r" # ر
285
+ sub "\u0691", "ṛ" # ڑ
286
+ sub "\u0632", "z" # ز
287
+ sub "\u0698", "zh" # ‫ژ‬
288
+ sub "\u0633", "s" # س
289
+ sub "\u0634", "sh" # ش
290
+ sub "\u0635", "ş" # ص
291
+ sub "\u0636", "ẕ" # ض
292
+ sub "\u0637", "ţ" # ط
293
+ sub "\u0638", "z̧" # ظ
294
+ sub "\u0639", "‘" # ع
295
+ sub "\u063a", "gh" # غ
296
+ sub "\u0641", "f" # ف
297
+ sub "\u0642", "q" # ق
298
+ sub "\u0643", "k" # ك
299
+ sub "\u06A9", "k" # ک
300
+ sub "\u06AF", "g" # ‫گ‬
301
+ sub "\u0644", "l" # ل
302
+ sub "\u0645", "m" # م
303
+ sub "\u0646", "n" # ن
304
+ sub "\u06BA", "ñ" # ڼ
305
+ sub any("\u0647\u06c1\u06be"), "h" # ه
306
+ sub "\u0648", "w" # و
307
+ sub any("\u064a\u06cc"), "y" # ي
308
+ # '\u0649' : 'y' # ي
309
+ sub "\u06D0", "ē" # ې
310
+ sub "\u06CD", "êy" # ‫ۍ‬
311
+ }
312
+
313
+ # POSTRULES
314
+ sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'-")
315
+ # don't capitalize defined article in the middle of a sentence
316
+ sub " At T", " at T" # الت
317
+ sub " As̄ S̄", " as̄ S̄" # الث
318
+ sub " Ad D", " ad D" # الد
319
+ sub " Az̄ Z̄", " az̄ Z̄" # الذ
320
+ sub " Ar R", " ar R" # الر
321
+ sub " Az Z", " az Z" # الز
322
+ sub " As S", " as S" # الس
323
+ sub " Ash Sh", " ash Sh" # الش
324
+ sub " Aş Ş", " aş Ş" # الص
325
+ sub " Aẕ Ẕ", " aẕ Ẕ" # الض
326
+ sub " Aţ Ţ", " aţ Ţ" # الط
327
+ sub " Az̧ Z̧", " az̧ Z̧" # الظ
328
+ sub " Al L", " al L" # الل
329
+ sub " An N", " an N" # الن
330
+ sub " Al ", " al " # ال
331
+
332
+ compose
333
+ }