interscript-maps 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. checksums.yaml +7 -0
  2. data/README.adoc +28 -0
  3. data/interscript-maps.gemspec +28 -0
  4. data/interscript-maps.yaml +235 -0
  5. data/libs/posix.iml +11 -0
  6. data/libs/unicode.iml +13 -0
  7. data/libs/var-Cyrl.iml +7 -0
  8. data/libs/var-kor.iml +17 -0
  9. data/maps-staging/royin-tha-Thai-Latn-1939-generic.imp +98 -0
  10. data/maps-staging/royin-tha-Thai-Latn-1968.imp +156 -0
  11. data/maps-staging/royin-tha-Thai-Latn-1999-chained.imp +161 -0
  12. data/maps-staging/royin-tha-Thai-Latn-1999.imp +78 -0
  13. data/maps-staging/var-tha-Thai-Thai-phonemic.imp +53 -0
  14. data/maps-staging/var-tha-Thai-Zsym-ipa.imp +273 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.imp +27515 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.imp +392 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.imp +85 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.imp +1171 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.imp +214 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.imp +53 -0
  21. data/maps/alalc-aze-Arab-Latn-1997.imp +321 -0
  22. data/maps/alalc-aze-Cyrl-Latn-1997.imp +101 -0
  23. data/maps/alalc-bel-Cyrl-Latn-1997.imp +118 -0
  24. data/maps/alalc-ben-Beng-Latn-1997.imp +225 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.imp +135 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.imp +110 -0
  27. data/maps/alalc-div-Thaa-Latn-1997.imp +171 -0
  28. data/maps/alalc-ell-Grek-Latn-1997.imp +381 -0
  29. data/maps/alalc-ell-Grek-Latn-2010.imp +382 -0
  30. data/maps/alalc-guj-Gujr-Latn-1997.imp +223 -0
  31. data/maps/alalc-guj-Gujr-Latn-2011.imp +57 -0
  32. data/maps/alalc-hin-Deva-Latn-1997.imp +248 -0
  33. data/maps/alalc-hin-Deva-Latn-2011.imp +63 -0
  34. data/maps/alalc-kan-Kana-Latn-1997.imp +233 -0
  35. data/maps/alalc-kan-Kana-Latn-2011.imp +58 -0
  36. data/maps/alalc-kat-Geok-Latn-1997.imp +109 -0
  37. data/maps/alalc-kat-Geor-Latn-1997.imp +104 -0
  38. data/maps/alalc-kor-Hang-Latn-1997.imp +68 -0
  39. data/maps/alalc-mal-Mlym-Latn-1997.imp +260 -0
  40. data/maps/alalc-mal-Mlym-Latn-2012.imp +65 -0
  41. data/maps/alalc-mar-Deva-Latn-1997.imp +178 -0
  42. data/maps/alalc-mar-Deva-Latn-2011.imp +51 -0
  43. data/maps/alalc-mkd-Cyrl-Latn-1997.imp +125 -0
  44. data/maps/alalc-mkd-Cyrl-Latn-2013.imp +113 -0
  45. data/maps/alalc-mon-Cyrl-Latn-1997.imp +161 -0
  46. data/maps/alalc-ori-Orya-Latn-1997.imp +234 -0
  47. data/maps/alalc-ori-Orya-Latn-2011.imp +59 -0
  48. data/maps/alalc-pan-Guru-Latn-1997.imp +241 -0
  49. data/maps/alalc-pan-Guru-Latn-2011.imp +71 -0
  50. data/maps/alalc-per-Arab-Latn-1997.imp +318 -0
  51. data/maps/alalc-pli-Deva-Latn-2012.imp +140 -0
  52. data/maps/alalc-pra-Deva-Latn-2012.imp +52 -0
  53. data/maps/alalc-rus-Cyrl-Latn-1997.imp +165 -0
  54. data/maps/alalc-rus-Cyrl-Latn-2012.imp +107 -0
  55. data/maps/alalc-san-Deva-Latn-2012.imp +207 -0
  56. data/maps/alalc-sin-Sinh-Latn-1997.imp +246 -0
  57. data/maps/alalc-sin-Sinh-Latn-2011.imp +63 -0
  58. data/maps/alalc-srp-Cyrl-Latn-1997.imp +124 -0
  59. data/maps/alalc-srp-Cyrl-Latn-2013.imp +115 -0
  60. data/maps/alalc-tam-Taml-Latn-1997.imp +52 -0
  61. data/maps/alalc-tam-Taml-Latn-2011.imp +49 -0
  62. data/maps/alalc-tel-Telu-Latn-1997.imp +237 -0
  63. data/maps/alalc-tel-Telu-Latn-2011.imp +58 -0
  64. data/maps/alalc-ukr-Cyrl-Latn-1997.imp +123 -0
  65. data/maps/alalc-ukr-Cyrl-Latn-2011.imp +32 -0
  66. data/maps/apcbg-bul-Cyrl-Latn-1995.imp +194 -0
  67. data/maps/az-aze-Cyrl-Latn-1939.imp +105 -0
  68. data/maps/az-aze-Cyrl-Latn-1958.imp +50 -0
  69. data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +160 -0
  70. data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +165 -0
  71. data/maps/bgn-jpn-Hrkt-Latn-1962.imp +288 -0
  72. data/maps/bgn-kor-Hang-Latn-1943.imp +31 -0
  73. data/maps/bgn-kor-Kore-Latn-1943.imp +33 -0
  74. data/maps/bgna-bul-Cyrl-Latn-2006.imp +119 -0
  75. data/maps/bgna-bul-Cyrl-Latn-2009.imp +119 -0
  76. data/maps/bgnpcgn-amh-Ethi-Latn-1967.imp +393 -0
  77. data/maps/bgnpcgn-ara-Arab-Latn-1956.imp +472 -0
  78. data/maps/bgnpcgn-arm-Armn-Latn-1981.imp +125 -0
  79. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.imp +111 -0
  80. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +169 -0
  81. data/maps/bgnpcgn-bal-Arab-Latn-2008.imp +296 -0
  82. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +200 -0
  83. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.imp +137 -0
  84. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.imp +38 -0
  85. data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +176 -0
  86. data/maps/bgnpcgn-deu-Latn-Latn-2000.imp +56 -0
  87. data/maps/bgnpcgn-div-Thaa-Latn-1972.imp +90 -0
  88. data/maps/bgnpcgn-div-Thaa-Latn-1988.imp +71 -0
  89. data/maps/bgnpcgn-ell-Grek-Latn-1962.imp +443 -0
  90. data/maps/bgnpcgn-ell-Grek-Latn-1996.imp +269 -0
  91. data/maps/bgnpcgn-fao-Latn-Latn-1964.imp +41 -0
  92. data/maps/bgnpcgn-fao-Latn-Latn-1968.imp +28 -0
  93. data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +111 -0
  94. data/maps/bgnpcgn-isl-Latn-Latn-1964.imp +42 -0
  95. data/maps/bgnpcgn-isl-Latn-Latn-1968.imp +32 -0
  96. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.imp +191 -0
  97. data/maps/bgnpcgn-kat-Geor-Latn-1981.imp +116 -0
  98. data/maps/bgnpcgn-kat-Geor-Latn-2009.imp +43 -0
  99. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.imp +193 -0
  100. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.imp +170 -0
  101. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +177 -0
  102. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +40 -0
  103. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +41 -0
  104. data/maps/bgnpcgn-kur-Arab-Latn-2007.imp +240 -0
  105. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.imp +132 -0
  106. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.imp +174 -0
  107. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.imp +168 -0
  108. data/maps/bgnpcgn-nep-Deva-Latn-2011.imp +208 -0
  109. data/maps/bgnpcgn-per-Arab-Latn-1958.imp +312 -0
  110. data/maps/bgnpcgn-prs-Arab-Latn-2007.imp +552 -0
  111. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.imp +445 -0
  112. data/maps/bgnpcgn-pus-Arab-Latn-1968.imp +289 -0
  113. data/maps/bgnpcgn-ron-cyrl-latn-2002.imp +165 -0
  114. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.imp +133 -0
  115. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.imp +195 -0
  116. data/maps/bgnpcgn-sme-Latn-Latn-1984.imp +48 -0
  117. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.imp +55 -0
  118. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.imp +146 -0
  119. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.imp +185 -0
  120. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.imp +188 -0
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.imp +136 -0
  122. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.imp +88 -0
  123. data/maps/bgnpcgn-urd-Arab-Latn-2007.imp +333 -0
  124. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.imp +145 -0
  125. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.imp +74 -0
  126. data/maps/bgnpcgn-zho-Hans-Latn-1979.imp +7463 -0
  127. data/maps/bis-asm-Beng-Latn-13194-1991.imp +154 -0
  128. data/maps/bis-ben-Beng-Latn-13194-1991.imp +151 -0
  129. data/maps/bis-dev-Deva-Latn-13194-1991.imp +178 -0
  130. data/maps/bis-guj-Gujr-Latn-13194-1991.imp +172 -0
  131. data/maps/bis-kan-Kana-Latn-13194-1991.imp +166 -0
  132. data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +170 -0
  133. data/maps/bis-ori-Orya-Latn-13194-1991.imp +168 -0
  134. data/maps/bis-pnj-Guru-Latn-13194-1991.imp +169 -0
  135. data/maps/bis-tel-Telu-Latn-13194-1991.imp +165 -0
  136. data/maps/bis-tml-Taml-Latn-13194-1991.imp +149 -0
  137. data/maps/by-bel-Cyrl-Latn-1998.imp +123 -0
  138. data/maps/by-bel-Cyrl-Latn-2007.imp +77 -0
  139. data/maps/din-grc-Grek-Latn-31634-2011-t1.imp +627 -0
  140. data/maps/din-hin-Deva-Latn-33904-2018.imp +101 -0
  141. data/maps/din-kat-Geor-Latn-32707-2010.imp +103 -0
  142. data/maps/din-mar-Deva-Latn-33904-2018.imp +83 -0
  143. data/maps/din-nep-Deva-Latn-33904-2018.imp +110 -0
  144. data/maps/din-pli-Deva-Latn-33904-2018.imp +72 -0
  145. data/maps/din-pra-Deva-Latn-33904-2018.imp +66 -0
  146. data/maps/din-san-Deva-Latn-33904-2018.imp +294 -0
  147. data/maps/din-tam-Taml-Latn-33903-2016.imp +187 -0
  148. data/maps/dos-nep-Deva-Latn-1997.imp +47 -0
  149. data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +399 -0
  150. data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +397 -0
  151. data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +34 -0
  152. data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +178 -0
  153. data/maps/ggg-kat-Geor-Latn-2002.imp +75 -0
  154. data/maps/gki-bel-Cyrl-Latn-1992.imp +44 -0
  155. data/maps/gki-bel-Cyrl-Latn-2000.imp +159 -0
  156. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.imp +179 -0
  157. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.imp +132 -0
  158. data/maps/hk-yue-Hani-Latn-1888.imp +29201 -0
  159. data/maps/icao-bel-Cyrl-Latn-9303.imp +136 -0
  160. data/maps/icao-bul-Cyrl-Latn-9303.imp +127 -0
  161. data/maps/icao-fas-Arab-Latn-9303.imp +112 -0
  162. data/maps/icao-heb-Hebr-Latn-9303.imp +160 -0
  163. data/maps/icao-mkd-Cyrl-Latn-9303.imp +126 -0
  164. data/maps/icao-rus-Cyrl-Latn-9303.imp +126 -0
  165. data/maps/icao-srp-Cyrl-Latn-9303.imp +126 -0
  166. data/maps/icao-ukr-Cyrl-Latn-9303.imp +127 -0
  167. data/maps/iso-ara-Arab-Latn-233-1984.imp +301 -0
  168. data/maps/iso-asm-Beng-Latn-15919-2001.imp +73 -0
  169. data/maps/iso-ben-Beng-Latn-15919-2001.imp +171 -0
  170. data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +365 -0
  171. data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +43 -0
  172. data/maps/iso-guj-Gujr-Latn-15919-2001.imp +214 -0
  173. data/maps/iso-hin-Deva-Latn-15919-2001.imp +73 -0
  174. data/maps/iso-inc-Deva-Latn-15919-2001.imp +61 -0
  175. data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +59 -0
  176. data/maps/iso-kan-Kana-Latn-15919-2001.imp +212 -0
  177. data/maps/iso-kat-Geor-Latn-9984-1996.imp +103 -0
  178. data/maps/iso-kor-Hang-Latn-1996-method1.imp +140 -0
  179. data/maps/iso-kor-Hang-Latn-1996-method2.imp +132 -0
  180. data/maps/iso-mal-Mlym-Latn-15919-2001.imp +276 -0
  181. data/maps/iso-mar-Deva-Latn-15919-2001.imp +68 -0
  182. data/maps/iso-nep-Deva-Latn-15919-2001.imp +75 -0
  183. data/maps/iso-ori-Orya-Latn-15919-2001.imp +188 -0
  184. data/maps/iso-pan-Guru-Latn-15919-2001.imp +217 -0
  185. data/maps/iso-pli-Beng-Latn-15919-2001.imp +66 -0
  186. data/maps/iso-pli-Deva-Latn-15919-2001.imp +68 -0
  187. data/maps/iso-pli-Sinh-Latn-15919-2001.imp +211 -0
  188. data/maps/iso-pli-Thai-Latn-15919-2001.imp +47 -0
  189. data/maps/iso-pra-Deva-Latn-15919-2001.imp +60 -0
  190. data/maps/iso-prs-Arab-Latn-233-3-1999.imp +352 -0
  191. data/maps/iso-rus-Cyrl-Latn-9-1995.imp +279 -0
  192. data/maps/iso-san-Deva-Latn-15919-2001.imp +215 -0
  193. data/maps/iso-tam-Taml-Latn-15919-2001.imp +153 -0
  194. data/maps/iso-tel-Telu-Latn-15919-2001.imp +214 -0
  195. data/maps/iso-tha-Thai-Latn-11940-1998.imp +114 -0
  196. data/maps/kp-kor-Hang-Latn-2002.imp +540 -0
  197. data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +29005 -0
  198. data/maps/masm-mon-Cyrl-Latn-5217-2012.imp +136 -0
  199. data/maps/masm-mon-Latn-Cyrl-5217-2012.imp +162 -0
  200. data/maps/mext-jpn-Hrkt-Latn-1954.imp +403 -0
  201. data/maps/moct-kor-Hang-Latn-2000.imp +475 -0
  202. data/maps/mofa-jpn-Hrkt-Latn-1989.imp +484 -0
  203. data/maps/mv-div-Thaa-Latn-1987.imp +144 -0
  204. data/maps/mvd-bel-Cyrl-Latn-2008.imp +224 -0
  205. data/maps/mvd-bel-Cyrl-Latn-2010.imp +64 -0
  206. data/maps/mvd-rus-Cyrl-Latn-2008.imp +110 -0
  207. data/maps/mvd-rus-Cyrl-Latn-2010.imp +40 -0
  208. data/maps/odni-ara-Arab-Latn-2004.imp +106 -0
  209. data/maps/odni-ara-Arab-Latn-2015.imp +281 -0
  210. data/maps/odni-aze-Cyrl-Latn-2015.imp +158 -0
  211. data/maps/odni-bel-Cyrl-Latn-2015.imp +138 -0
  212. data/maps/odni-bul-Cyrl-Latn-2005.imp +90 -0
  213. data/maps/odni-bul-Cyrl-Latn-2015.imp +103 -0
  214. data/maps/odni-che-Cyrl-Latn-2015.imp +165 -0
  215. data/maps/odni-fas-Arab-Latn-2004.imp +268 -0
  216. data/maps/odni-fas-Arab-Latn-2015.imp +398 -0
  217. data/maps/odni-hin-Deva-Latn-2004.imp +180 -0
  218. data/maps/odni-hin-Deva-Latn-2015.imp +256 -0
  219. data/maps/odni-kat-Geor-Latn-2015.imp +76 -0
  220. data/maps/odni-kaz-Cyrl-Latn-2015.imp +164 -0
  221. data/maps/odni-kir-Cyrl-Latn-2015.imp +149 -0
  222. data/maps/odni-kor-Hang-Latn-2015.imp +307 -0
  223. data/maps/odni-mkd-Cyrl-Latn-2005.imp +28 -0
  224. data/maps/odni-mkd-Cyrl-Latn-2015.imp +124 -0
  225. data/maps/odni-prs-Arab-Latn-2004.imp +120 -0
  226. data/maps/odni-prs-Arab-Latn-2015.imp +225 -0
  227. data/maps/odni-pus-Arab-Latn-2011.imp +327 -0
  228. data/maps/odni-rus-Cyrl-Latn-2015.imp +79 -0
  229. data/maps/odni-srp-Cyrl-Latn-2005.imp +35 -0
  230. data/maps/odni-srp-Cyrl-Latn-2015.imp +130 -0
  231. data/maps/odni-tat-Cyrl-Latn-2015.imp +157 -0
  232. data/maps/odni-tgk-Cyrl-Latn-2015.imp +161 -0
  233. data/maps/odni-tuk-Cyrl-Latn-2015.imp +159 -0
  234. data/maps/odni-uig-Cyrl-Latn-2015.imp +151 -0
  235. data/maps/odni-ukr-Cyrl-Latn-2015.imp +136 -0
  236. data/maps/odni-urd-Arab-Latn-2015.imp +220 -0
  237. data/maps/odni-uzb-Cyrl-Latn-2015.imp +165 -0
  238. data/maps/sac-zho-Hans-Latn-1979.imp +20940 -0
  239. data/maps/sasm-mon-Mong-Latn-general-1978.imp +294 -0
  240. data/maps/sasm-mon-Mong-Latn-phonetic-1978.imp +261 -0
  241. data/maps/ses-ara-Arab-Latn-1930.imp +225 -0
  242. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.imp +171 -0
  243. data/maps/ua-ukr-Cyrl-Latn-1996.imp +149 -0
  244. data/maps/ua-ukr-Cyrl-Latn-2007.imp +69 -0
  245. data/maps/ua-ukr-Cyrl-Latn-2010.imp +128 -0
  246. data/maps/un-amh-Ethi-Latn-2016.imp +483 -0
  247. data/maps/un-ara-Arab-Latn-1971.imp +137 -0
  248. data/maps/un-ara-Arab-Latn-1972.imp +155 -0
  249. data/maps/un-ara-Arab-Latn-2017.imp +375 -0
  250. data/maps/un-asm-Beng-Latn-1972.imp +188 -0
  251. data/maps/un-bel-Cyrl-Latn-2007.imp +78 -0
  252. data/maps/un-ben-Beng-Latn-2016.imp +516 -0
  253. data/maps/un-ell-Grek-Latn-1987-phonetic.imp +437 -0
  254. data/maps/un-ell-Grek-Latn-1987-tl.imp +27 -0
  255. data/maps/un-ell-Grek-Latn-1987-ts.imp +269 -0
  256. data/maps/un-guj-Gujr-Latn-1972.imp +196 -0
  257. data/maps/un-hin-Deva-Latn-2016.imp +356 -0
  258. data/maps/un-kan-Kana-Latn-2016.imp +214 -0
  259. data/maps/un-mal-Mlym-Latn-1972.imp +215 -0
  260. data/maps/un-mar-Deva-Latn-2016.imp +96 -0
  261. data/maps/un-mon-Mong-Latn-general-2013.imp +170 -0
  262. data/maps/un-mon-Mong-Latn-phonetic-2013.imp +170 -0
  263. data/maps/un-nep-Deva-Latn-1972.imp +295 -0
  264. data/maps/un-nep-Deva-Latn-2013.imp +62 -0
  265. data/maps/un-ori-Orya-Latn-1972.imp +208 -0
  266. data/maps/un-pan-Guru-Latn-1972.imp +321 -0
  267. data/maps/un-prs-Arab-Latn-1967.imp +214 -0
  268. data/maps/un-rus-Cyrl-Latn-1987.imp +96 -0
  269. data/maps/un-sin-Sinh-Latn-1972.imp +193 -0
  270. data/maps/un-tam-Taml-Latn-1972.imp +173 -0
  271. data/maps/un-tel-Telu-Latn-1972.imp +229 -0
  272. data/maps/un-ukr-Cyrl-Latn-1998.imp +58 -0
  273. data/maps/un-ukr-Cyrl-Latn-2012.imp +95 -0
  274. data/maps/un-urd-Arab-Latn-1972.imp +290 -0
  275. data/maps/var-amh-Ethi-Latn-eae-2003.imp +414 -0
  276. data/maps/var-gez-Ethi-Latn-eae-2003.imp +54 -0
  277. data/maps/var-hin-Deva-Latn-hunterian-1872.imp +212 -0
  278. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +399 -0
  279. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +382 -0
  280. data/maps/var-kor-Hang-Hang-jamo.imp +11196 -0
  281. data/maps/var-kor-Hang-Latn-mr-1939.imp +574 -0
  282. data/maps/var-kor-Kore-Hang-2013.imp +59764 -0
  283. data/maps/var-kor-Kore-Latn-mr-1939.imp +36 -0
  284. data/maps/var-mar-Deva-Latn-hunterian-1872.imp +39 -0
  285. data/maps/var-mon-Mong-Latn-1930.imp +101 -0
  286. data/maps/var-mon-Mong-Latn-lessing.imp +181 -0
  287. data/maps/var-mon-Mong-Latn-vpmc.imp +182 -0
  288. data/maps/var-pra-Deva-Latn-iast-1912.imp +36 -0
  289. data/maps/var-san-Deva-Latn-iast-1912.imp +147 -0
  290. data/maps/var-zho-Hani-Latn-wd-1979.imp +27549 -0
  291. metadata +335 -0
@@ -0,0 +1,170 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 1979
4
+ language: iso-639-2:kir
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Romanization of Kyrgyz
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/816663/ROMANIZATION_OF_KYRGYZ.pdf
9
+ creation_date: 1979
10
+ confirmation_date: 2019
11
+ description: |
12
+ The BGN/PCGN system for Kyrgyz Cyrillic was designed for use in romanizing names written
13
+ in the Kyrgyz Cyrillic alphabet. The Kyrgyz Cyrillic alphabet contains three characters not present in
14
+ the Russian alphabet: Ң , Ө, and Y.
15
+
16
+ notes:
17
+ - Both Kyrgyz and Kirghiz may frequently be seen as the language name; both these spellings are used in the
18
+ ISO 639 Standard on the representation of names for languages.
19
+ - The character sequence н г may be romanized n·g in order to differentiate that romanization
20
+ from the digraph ng, which is used to render the character ң.
21
+ - The character ы may be romanized i (Unicode encoding 0069+0335) instead of y, if so desired.
22
+ - 'An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the
23
+ unmodified letters of the basic Roman script is:
24
+ All apostrophes appearing in romanization are U+2019
25
+ Ö (U+00D6) ö (U+00F6)
26
+ Ü (U+00DC) ü (U+00FC)'
27
+ - The Romanization column shows only lowercase forms but, when romanizing, uppercase and
28
+ lowercase Roman letters as appropriate should be used.
29
+ }
30
+
31
+ tests {
32
+ test "Ысык-Көл Облусу", "Ysyk-Köl Oblusu"
33
+ test "Ысык-Көл", "Ysyk-Köl"
34
+ test "Шедвик-Сай", "Shedvik-Say"
35
+ test "Чүй Облусу", "Chüy Oblusu"
36
+ test "Чүй", "Chüy"
37
+ test "Чирик-Сай", "Chirik-Say"
38
+ test "Хребет Джети-Сандал", "Khrebet Djeti-Sandal"
39
+ test "Узук-Булак", "Uzuk-Bulak"
40
+ test "Торугарт Ашуу", "Torugart Ashuu"
41
+ test "Торетал", "Toretal"
42
+ test "Терек", "Terek"
43
+ test "Талды-Булак", "Taldy-Bulak"
44
+ test "Талас Облусу", "Talas Oblusu"
45
+ test "Талас", "Talas"
46
+ test "Сарык-Кёль", "Saryk-Kyol’"
47
+ test "Родник Кара-Суу", "Rodnik Kara-Suu"
48
+ test "Родник Бейрёк-Булак", "Rodnik Beyryok-Bulak"
49
+ test "Перевал Сары-Челек", "Pereval Sary-Chelek"
50
+ test "Перевал Макмал", "Pereval Makmal"
51
+ test "Перевал Кара-Токой", "Pereval Kara-Tokoy"
52
+ test "Перевал Ашуу-Тёр", "Pereval Ashuu-Tyor"
53
+ test "Перевал Ашуу", "Pereval Ashuu"
54
+ test "Ош Шаары", "Osh Shaary"
55
+ test "Ош Облусу", "Osh Oblusu"
56
+ test "Ош", "Osh"
57
+ test "Ош", "Osh"
58
+ test "Осоавиахим", "Osoaviakhim"
59
+ test "Озеро Афлатук", "Ozero Aflatuk"
60
+ test "Нарын Облусу", "Naryn Oblusu"
61
+ test "Нарын", "Naryn"
62
+ test "Метеорологическая Станция Чамкал", "Meteorologicheskaya Stantsiya Chamkal"
63
+ test "Марза-Булак", "Marza-Bulak"
64
+ test "Макмал", "Makmal"
65
+ test "Кыргызстан", "Kyrgyzstan"
66
+ test "Кыргыз Республикасы", "Kyrgyz Respublikasy"
67
+ test "Куру-Сай", "Kuru-Say"
68
+ test "Куру-Сай", "Kuru-Say"
69
+ test "Кур-Пырылды", "Kur-Pyryldy"
70
+ test "Кок-Бель-Таш", "Kok-Bel’-Tash"
71
+ test "Кичи-Сандык", "Kichi-Sandyk"
72
+ test "Кель-Сай", "Kel’-Say"
73
+ test "Карагайлы", "Karagayly"
74
+ test "Кара-Суу", "Kara-Suu"
75
+ test "Жалал-Абад Облусу", "Jalal-Abad Oblusu"
76
+ test "Жалал-Абад", "Jalal-Abad"
77
+ test "Долина Беш-Башат", "Dolina Besh-Bashat"
78
+ test "Гора Арпа-Турча", "Gora Arpa-Turcha"
79
+ test "Бишкек Шаары", "Bishkek Shaary"
80
+ test "Бишкек", "Bishkek"
81
+ test "Бишкек", "Bishkek"
82
+ test "Баткен Облусу", "Batken Oblusu"
83
+ test "Баткен", "Batken"
84
+ test "Аяк-Терек", "Ayak-Terek"
85
+ test "Аюу-Чача", "Ayuu-Chacha"
86
+ test "Арпа", "Arpa"
87
+ test "Ак-Суу", "Ak-Suu"
88
+ }
89
+
90
+ stage {
91
+
92
+ # CHARACTERS
93
+ parallel {
94
+ sub "\u0410", "A" # А
95
+ sub "\u0411", "B" # Б
96
+ sub "\u0412", "V" # В
97
+ sub "\u0413", "G" # Г
98
+ sub "\u0414", "D" # Д
99
+ sub "\u0415", "E" # Е
100
+ sub "\u0401", "Yo" # Ё
101
+ sub "\u0416", "J" # Ж
102
+ sub "\u0417", "Z" # З
103
+ sub "\u0418", "I" # И
104
+ sub "\u0419", "Y" # Й
105
+ sub "\u041A", "K" # К
106
+ sub "\u041B", "L" # Л
107
+ sub "\u041C", "M" # М
108
+ sub "\u041D", "N" # Н
109
+ sub "\u04A2", "Ng" # Ң
110
+ sub "\u041E", "O" # О
111
+ sub "\u04E8", "Ö" # Ө
112
+ sub "\u041F", "P" # П
113
+ sub "\u0420", "R" # Р
114
+ sub "\u0421", "S" # С
115
+ sub "\u0422", "T" # Т
116
+ sub "\u0423", "U" # У
117
+ sub "\u04AE", "Ü" # Ү
118
+ sub "\u0424", "F" # Ф
119
+ sub "\u0425", "Kh" # Х
120
+ sub "\u0426", "Ts" # Ц
121
+ sub "\u0427", "Ch" # Ч
122
+ sub "\u0428", "Sh" # Ш
123
+ sub "\u0429", "Shch" # Щ
124
+ sub "\u042A", "”" # Ъ
125
+ sub "\u042B", "Y" # Ы
126
+ sub "\u042C", "’" # Ь
127
+ sub "\u042D", "E" # Э
128
+ sub "\u042E", "Yu" # Ю
129
+ sub "\u042F", "Ya" # Я
130
+
131
+ sub "\u0430", "a" # а
132
+ sub "\u0431", "b" # б
133
+ sub "\u0432", "v" # в
134
+ sub "\u0433", "g" # г
135
+ sub "\u0434", "d" # д
136
+ sub "\u0435", "e" # e
137
+ sub "\u0451", "yo" # ё
138
+ sub "\u0436", "j" # ж
139
+ sub "\u0437", "z" # з
140
+ sub "\u0438", "i" # и
141
+ sub "\u0439", "y" # й
142
+ sub "\u043A", "k" # к
143
+ sub "\u043B", "l" # л
144
+ sub "\u043C", "m" # м
145
+ sub "\u043D", "n" # н
146
+ sub "\u04A3", "ng" # ң
147
+ sub "\u043E", "o" # о
148
+ sub "\u04E9", "ö" # ө
149
+ sub "\u043F", "p" # п
150
+ sub "\u0440", "r" # р
151
+ sub "\u0441", "s" # с
152
+ sub "\u0442", "t" # т
153
+ sub "\u0443", "u" # у
154
+ sub "\u04AF", "ü" # ү
155
+ sub "\u0444", "f" # ф
156
+ sub "\u0445", "kh" # х
157
+ sub "\u0446", "ts" # ц
158
+ sub "\u0447", "ch" # ч
159
+ sub "\u0448", "sh" # ш
160
+ sub "\u0449", "shch" # щ
161
+ sub "\u044A", "”" # ъ
162
+ sub "\u044B", "y" # ы
163
+ sub "\u044C", "’" # ь
164
+ sub "\u044D", "e" # э
165
+ sub "\u044E", "yu" # ю
166
+ sub "\u044F", "ya" # я
167
+ }
168
+
169
+ }
170
+
@@ -0,0 +1,177 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: kn-1945
4
+ language: iso-639-2:kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: BGN/PCGN 1945 Agreement
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693725/ROMANIZATION_OF_KOREAN-_MR_for_DPRK.pdf
9
+ creation_date: 1945
10
+ adoption_date:
11
+ description:
12
+
13
+ notes: "
14
+
15
+ 1. At the end of a syllable, the character ᄋ should be romanized ng,
16
+ as in the following example:
17
+
18
+ 평양 → P’yŏngyang
19
+
20
+ At the beginning of a syllable, the character ᄋ is silent and
21
+ should not be romanized. An example follows:
22
+
23
+ 용화 → Yonghwa
24
+
25
+ 2. Syllable boundaries within words are not reflected in romanization.
26
+ In the different types of syllables shown in the table below, C
27
+ represents any consonant character, V represents any vowel character
28
+ and / represents a syllable boundary.
29
+
30
+ Han’gŭl 개성 남포 안양
31
+ Syllable boundaries CV/CVC CVC/CV VC/VC
32
+ Romanization Kaesŏng Namp’o Anyang
33
+
34
+ 3. Euphonic changes occurring within a word, including between the
35
+ specific and generic of a geographical name, should be reflected in
36
+ romanization. Generic terms are usually seen separated from the name
37
+ by a hyphen and with a lower case initial letter rather than as a
38
+ separate word:
39
+
40
+ 영진리 → Yŏngjil-li
41
+ 덕흥리 → Tŏkhŭng-ni
42
+ 압록강 → Amnok-kang
43
+ 대동강 → Taedong-gang
44
+
45
+ 4. As a result of 조선말규범집 (‘Standard Korean Language’ guidelines
46
+ published in North Korea in 1966), unlike the Korean spoken in the
47
+ Republic of Korea, the language spoken in the Democratic People’s
48
+ Republic of Korea maintains and pronounces the word-initial ᆯ (‘r’).
49
+ The use of the word-initial ᄅ ('r') can be seen in official news
50
+ reports as well as native mapping. Since such examples exist, the
51
+ word initial ᄅ ('r') is reflected as an option in the tables given above.
52
+
53
+ 5. The Romanization column shows only lowercase forms but, when romanizing,
54
+ uppercase and lowercase Roman letters as appropriate should be used.
55
+ "
56
+ }
57
+
58
+ tests {
59
+ test "평양", "P’yŏngyang"
60
+ test "용화", "Yonghwa"
61
+ test "개성", "Kaesŏng"
62
+ test "남포", "Namp’o"
63
+ test "안양", "Anyang"
64
+ test "영진-리", "Yŏngjil-li"
65
+ test "덕흥-리", "Tŏkhŭng-ni"
66
+ test "압록-강", "Amnok-kang"
67
+ test "대동-강", "Taedong-gang"
68
+ test "라선특별시", "Rasŏnt’ŭkpyŏlsi"
69
+ test "은하-리", "Ŭnha-ri"
70
+ test "은중-리", "Ŭnjung-ni"
71
+ test "은장-령", "Ŭnjang-nyŏng"
72
+ test "은혜-동", "Ŭnhye-dong"
73
+ test "은호-리", "Ŭnho-ri"
74
+ test "은행정", "Ŭnhaengjŏng"
75
+ test "은행-동", "Ŭnhaeng-dong"
76
+ test "은행-촌", "Ŭnhaeng-ch’on"
77
+ test "원수", "Wŏnsu"
78
+ test "원소리-고개", "Wŏnsori-gogae"
79
+ test "원소참", "Wŏnsoch’am"
80
+ test "원소-리", "Wŏnso-ri"
81
+ test "원신-리", "Wŏnsil-li"
82
+ test "난곡", "Nan’gok"
83
+ test "난산-리", "Nansal-li"
84
+ test "난직", "Nanjik"
85
+ test "영곡", "Yŏnggok"
86
+ test "윗두밀", "Wittumil"
87
+ test "윗도심이", "Wittosimi"
88
+ test "둔지", "Tunji"
89
+ test "서승", "Sŏsŭng"
90
+ test "신촌", "Sinch’on"
91
+ test "비암덕", "Piamdŏk"
92
+ test "바위안", "Pawian"
93
+ test "오송평", "Osongp’yŏng"
94
+ test "그물목", "Kŭmulmok"
95
+ test "구원정", "Kuwŏnjŏng"
96
+ test "일하", "Irha"
97
+ test "황우", "Hwangu"
98
+ test "자작보", "Chajakpo"
99
+ test "비파1-동", "Pip’a Il-tong"
100
+ test "문암 오-동", "Munam O-dong"
101
+ }
102
+
103
+ dependency "var-kor-Hang-Latn-mr-1939", as: hanglatn
104
+
105
+
106
+ stage {
107
+
108
+ # RULES
109
+ # Add Zero-width White-space U+200B after spaces (i.e. before word boundaries)
110
+ # So that the word-initial conversion rules will be blocked.
111
+ sub line_start, "\u200B"
112
+ sub "", "\u200B", before: " "
113
+
114
+ # convert numbers to space + Hangul
115
+ sub capture(any([any("0".."9") + " "])), ref( 1 ) + " ", after: any("0".."9")
116
+ sub "1", " 일"
117
+ sub "2", " 이"
118
+ sub "3", " 삼"
119
+ sub "4", " 사"
120
+ sub "5", " 오"
121
+ sub "6", " 육"
122
+ sub "7", " 칠"
123
+ sub "8", " 팔"
124
+ sub "9", " 구"
125
+
126
+ # This is a logic to add hyphen in front of generics
127
+ # - pattern: "(?<=.)(구역|동|리|도|고개|골|로동자구|사무소|초등학교|중학교|고등학교|강|포|령|역|봉|사|천|교|제|저수지|소류지|재|못|말|면|암|교회|촌|병원|바위|공원|섬|우체국|대학교|보건소|굴|치|대교|지구|폭포|해수욕장|휴게소|중고교|읍|보건진료소|마을|톨게이트|대학|시장|경찰서|학교)$" #to be expanded
128
+ # result: "-\\1"
129
+
130
+
131
+ run map.hanglatn.stage.main
132
+
133
+
134
+ # POSTRULES
135
+
136
+ # Add space to the two ends of the string for easier word boundary handling
137
+ sub line_start, " "
138
+ sub line_end, " "
139
+
140
+ # Initial rules in the inherited map were blocked, so that
141
+ # this set of updated rules (with the onset rules removed) will be used instead.
142
+ sub "\u200B", ""
143
+
144
+ sub "ᄀ", "k", before: " " # HANGUL CHOSEONG KIYEOK
145
+ sub "ᄂ", "n", before: " " # HANGUL CHOSEONG NIEUN
146
+ sub "ᄃ", "ch", before: " ", after: any("ᅵᅣᅤᅧᅨᅭᅲ") # HANGUL CHOSEONG TIEUT # T -> Ch before yotized vowels
147
+ sub "ᄃ", "t", before: " " # HANGUL CHOSEONG TIEUT
148
+ sub "ᄅ", "r", before: " " # HANGUL CHOSEONG RIEUL
149
+ sub "ᄆ", "m", before: " " # HANGUL CHOSEONG MIEUM
150
+ sub "ᄇ", "p", before: " " # HANGUL CHOSEONG PIEUP
151
+ sub "ᄉ", "sh", before: " ", after: "ᅱ" # HANGUL CHOSEONG SIOS
152
+ sub "ᄉ", "s", before: " " # HANGUL CHOSEONG SIOS
153
+ sub "ᄋ", "", before: " " # HANGUL CHOSEONG IEUNG
154
+ sub "ᄌ", "ch", before: " " # HANGUL CHOSEONG CIEUC
155
+ sub "ᄎ", "ch’", before: " " # HANGUL CHOSEONG CHIEUCH
156
+ sub "ᄏ", "k’", before: " " # HANGUL CHOSEONG KHIEUKH
157
+ sub "ᄐ", "ch’", before: " ", after: any("ᅵᅣᅤᅧᅨᅭᅲ") # HANGUL CHOSEONG THIEUTH + YOTIZED VOWELS
158
+ sub "ᄐ", "t’", before: " " # HANGUL CHOSEONG THIEUTH
159
+ sub "ᄑ", "p’", before: " " # HANGUL CHOSEONG PHIEUPH
160
+ sub "ᄒ", "h", before: " " # HANGUL CHOSEONG HIEUH
161
+ sub "ᄁ", "kk", before: " " # HANGUL CHOSEONG SSANGKIYEOK
162
+ sub "ᄭ", "kk", before: " " # HANGUL CHOSEONG SIOS-KIYEOK
163
+ sub "ᄄ", "tt", before: " " # HANGUL CHOSEONG SSANGTIEUT
164
+ sub "ᄯ", "tt", before: " " # HANGUL CHOSEONG SIOS-TIEUT
165
+ sub "ᄈ", "pp", before: " " # HANGUL CHOSEONG SSANGPIEUP
166
+ sub "ᄲ", "pp", before: " " # HANGUL CHOSEONG SIOS-PIEUP
167
+ sub "ᄊ", "ss", before: " " # HANGUL CHOSEONG SSANGSIOS
168
+ sub "ᄍ", "tch", before: " " # HANGUL CHOSEONG SSANGCIEUC
169
+ sub "ᄶ", "tch", before: " " # HANGUL CHOSEONG SIOS-CIEUC
170
+
171
+ # Remove space added
172
+ sub line_start + " ", ""
173
+ sub " " + line_end, ""
174
+
175
+ title_case
176
+ compose
177
+ }
@@ -0,0 +1,40 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2011
4
+ language: iso-639-2:kor
5
+ source_script: Hang
6
+ destination_script: Latn
7
+ name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
+ url:
9
+ creation_date:
10
+ adoption_date:
11
+ description:
12
+
13
+ notes:
14
+ BGN/PCGN 2011 Agreement
15
+ }
16
+
17
+ tests {
18
+ test "불국사", "Bulguksa"
19
+ test "묵호", "Mukho"
20
+ test "울산", "Ulsan"
21
+ test "독립문", "Dongnimmun"
22
+ test "강남역", "Gangnamyeok"
23
+ test "남산리", "Namsan-ri" #Note: no assimilation for -ri even after nasals
24
+ test "내월리", "Naewol-ri"
25
+ test "울릉군", "Ulleung-gun"
26
+ test "설악산", "Seoraksan"
27
+ test "삼죽면", "Samjuk-myeon"
28
+ test "평리1동", "Pyeongni Il-dong"
29
+ test "평리2동", "Pyeongni I-dong"
30
+ test "탑안이", "Tabani"
31
+ }
32
+
33
+ dependency "moct-kor-Hang-Latn-2000", as: hanglatn
34
+
35
+ stage {
36
+
37
+ run map.hanglatn.stage.main
38
+
39
+ title_case
40
+ }
@@ -0,0 +1,41 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2011
4
+ language: iso-639-2:kor
5
+ source_script: Kore
6
+ destination_script: Latn
7
+ name: Ministry of Culture and Tourism System (2000) BGN/PCGN 2011 Agreement
8
+ url: ""
9
+ creation_date: ""
10
+ adoption_date: ""
11
+ description: |
12
+ Establishes a system for the transliteration of the characters of Korean script into Latin characters.
13
+ Intended to provide a means for international communication of written documents.
14
+
15
+ notes:
16
+ - BGN/PCGN 2011 Agreement
17
+ }
18
+
19
+ tests {
20
+ test "佛國寺", "Bulguksa"
21
+ test "묵호", "Mukho"
22
+ test "蔚山", "Ulsan"
23
+ test "獨立門", "Dongnimmun"
24
+ test "江南驛", "Gangnamyeok"
25
+ test "南山里", "Namsan-ri" #Note: no assimilation for -ri even after nasals
26
+ test "내월里", "Naewol-ri"
27
+ test "鬱陵郡", "Ulleung-gun"
28
+ test "雪嶽山", "Seoraksan"
29
+ test "三竹面", "Samjuk-myeon"
30
+ test "坪里1洞", "Pyeongni Il-dong"
31
+ test "坪里2洞", "Pyeongni I-dong"
32
+ test "탑안이", "Tabani"
33
+ }
34
+
35
+ dependency "var-kor-Kore-Hang-2013", as: korehang
36
+ dependency "moct-kor-Hang-Latn-2000", as: hanglatn
37
+
38
+ stage {
39
+ run map.korehang.stage.main
40
+ run map.hanglatn.stage.main
41
+ }
@@ -0,0 +1,240 @@
1
+ metadata {
2
+ authority_id: bgnpcgn
3
+ id: 2007
4
+ language: kur
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ROMANIZATION OF KURDISH -- BGN/PCGN 2007
8
+ url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/693727/ROMANIZATION_OF_KURDISH.pdf
9
+ creation_date: 2007
10
+ confirmation_date: 2017-12
11
+ description: |
12
+ The tabulation below is applicable to the Kurdish language as a
13
+ whole. It is based for the most part on the Hawar Roman alphabet used
14
+ in the Library of Congress Standard Kurdish Orthography Table, but it
15
+ also incorporates certain non-Hawar elements found in A Kurdish-English
16
+ Dictionary (Taufiq Wahby & C J Edmonds, OUP, 1966). The tabulation
17
+ covers both major varieties of the Kurdish language: Kurmanji and
18
+ Sorani. Kurmanji is spoken principally in Turkey and in Iraq north of
19
+ the Great Zab River (Dahūk/Dihok Governorate). It is generally written
20
+ in Roman script, and usually employs the Roman orthography. Sorani is
21
+ spoken principally in Iraq south of the Great Zab river (Arbīl/Hewlêr
22
+ and As Sulaymānīyah/Slêmanî governorates). It is generally written in
23
+ Perso-Arabic script, and usually employs the Perso-Arabic script
24
+ orthography.
25
+
26
+ Kurdish forms of geographical names in Turkey will usually be found
27
+ in Roman script, and so no romanization process will be required. The
28
+ digraph options for consonant letters '\u0686', '\u0634', and '\u063A'
29
+ will not be encountered for such names. In Iraq, Syria, and Iran,
30
+ Kurdish will usually be encountered in Perso-Arabic script, in which
31
+ case it should be romanized into the corresponding Roman script form.
32
+ Kurdish geographical names for places and features outside Turkey,
33
+ found in Roman script form, should, where necessary and if possible, be
34
+ tailored to fit the orthography of the Romanization shown below and
35
+ should employ the digraph options for consonant letters '\u0686',
36
+ '\u0634', and '\u063A'.
37
+
38
+ notes:
39
+
40
+ - In pure Kurdish words hamza is borne by yā’ ( ئ ) and occurs only
41
+ before initial vowels; it is not romanized. Medial and final hamza in
42
+ Arabic borrowings are romanized by ’ (apostrophe – Unicode encoding
43
+ 2019).
44
+
45
+ - The letters ث ذ ص ض ط ظ do not occur in pure Kurdish words. In Arabic
46
+ borrowings some writers retain these letters, others substitute س ز س ز
47
+ ت ز respectively. Only the letters ط ض and ص are catered for in the
48
+ Library of Congress tabulation, as reflected in lines 16-18 of the
49
+ above Consonant table. Words of obvious Arabic origin occurring in a
50
+ Kurdish toponymic environment will be treated as Kurdish rather than
51
+ Arabic, as will words of other non-Kurdish origins.
52
+
53
+ - The digraph options appearing in rows 6, 15 and 20 of the consonants
54
+ table should be used for Kurdish geographical names in Iraq, Iran, and
55
+ Syria. The single character options should be used for Kurdish
56
+ geographical names in Turkey.
57
+
58
+ - ڨ is used to represent v in foreign words. Some southern Kurdish
59
+ writers use it to represent the v in borrowings from northern Kurdish
60
+ dialects. و is pronounced as a v in the north and as a w elsewhere.
61
+
62
+ - Hā’ can be used as a vowel or a consonant. The initial (ه) and medial
63
+ (forms are used for the consonant h, Consonant table, row 31, while the
64
+ final (ه) and independent (forms are used to represent the vowel e,
65
+ Vowel table, row 1. Therefore, when used as a consonant, the final and
66
+ independent forms of hā’ will be seen as ‘ه’ instead of ‘and ‘ه’,
67
+ respectively. For example, مهه meh, (“month”). When used as ‘e’, the
68
+ hā’ behaves like the letters alif (ا) , wāw, dāl (د) , and rā (ر) , in
69
+ that it never joins to the following letter (i.e., it has no medial
70
+ form). Consequently, the following letter will display the initial
71
+ form, e.g. هەولێر Hewlêr (unless there is only one following letter, in
72
+ which case it will be written in the independent form, e.g. ماوەت
73
+ Mawet). As with other vowels (see special rules 2 and 3), initial e is
74
+ preceded by the kursî hamza, yielding initial ئه , e.g. ئهني enî
75
+ “forehead”.
76
+
77
+ - In pure Kurdish words, the vowel ى is always long î, e.g. كانى ماسێ
78
+ Kanî Masê. When it represents îzafe, it is also romanized î and joined
79
+ by means of a hyphen to its preceding word e.g. پارێزگاى دهۆك Parêzga-î
80
+ Dihok.
81
+
82
+ - |
83
+ An inventory of letter-diacritic combinations, used in addition to
84
+ the unmodified letters of the basic Roman script in the Romanization of
85
+ Kurdish, with their Unicode encoding, is:
86
+
87
+ '‘': '\u2018' , '’': '2019'
88
+ 'Ç': '00C7' , 'ç': '00E7'
89
+ 'Ḍ': '1E0C' , 'ḍ': '1E0D'
90
+ 'Ê': '00CA' , 'ê': '00EA'
91
+
92
+ # There is no single Unicode encoding for these letter-diacritic combinations.
93
+ 'Ḧ': '0048+0308' , 'ḧ': '0068+0308'
94
+ 'Î': '00CE' , 'î': '00EE'
95
+ 'Ł': '0141' , 'ł': '0142'
96
+ 'Ö': '00D6' , 'ö': '00F6'
97
+ 'Ṟ': '1E5E' , 'ṟ': '1E5F'
98
+ 'Ş': '015E' , 'ş': '015F'
99
+ 'Ṣ': '1E62' , 'ṣ': '1E63'
100
+ 'Ṭ': '1E6C' , 'ṭ': '1E6D'
101
+ 'Û': '00DB' , 'û': '00FB'
102
+ 'Ü': '00DC' , 'ü': '00FC'
103
+ 'Ẍ': '1E8C' , 'ẍ': '1E8D'
104
+
105
+ - The Romanization column shows only lowercase forms but, when
106
+ romanizing, uppercase and lowercase Roman letters as appropriate should
107
+ be used.
108
+
109
+ # Special Rules
110
+ - The conjunction و (and) should be rendered u if the
111
+ preceding word ends in a consonant, and w if the preceding
112
+ word ends in a vowel. It should be separated by spaces from
113
+ the preceding and following words.
114
+
115
+ - In the Perso-Arabic orthography for Kurdish, all vowels are
116
+ written, with the exception of the short i, which is
117
+ expressed with a kasrah under the preceding consonant (ِ).
118
+ In Perso-Arabic script, the kasrah will rarely be written (
119
+ e.g., كرن kirin “to do”). Like all Kurdish vowels, the
120
+ short i will be preceded by a kursî hamza )ئ )if it appears
121
+ at the beginning of a word (see 3 below; see row 4 of vowel
122
+ table).
123
+
124
+ - In the Perso-Arabic orthography for Kurdish, when a vowel
125
+ comes at the beginning of a word, or when a vowel directly
126
+ follows another vowel, a kursî hamza )ئ )precedes it (e.g.,
127
+ ئاگر agir “fire”).
128
+
129
+ - A Kurdish word will never start with alif )ا .)A Kurdish
130
+ word may begin with a yā’ (ي) or wāw )و ,)but only when
131
+ they are used as a consonant, when they will be romanized
132
+ as y and w, respectively.
133
+
134
+ - When preceded by a consonant, yā’ (ي )and wāw )و )should be
135
+ romanized î and u, respectively. When preceded by a vowel (
136
+ including short i, which is not written), yā’ (ي )and wāw (
137
+ و )should be romanized y and w, respectively.
138
+
139
+ - The Arabic sign shaddah ( ّ ) denoting a doubled consonant
140
+ is not used in Kurdish; doubled consonants, which are rare,
141
+ are written twice e.g. موحەممەد Muḧemmed; ننا موسه Musanna.
142
+ Shaddah might be used in Arabic borrowings but, as in
143
+ unpointed Arabic, would generally be omitted.
144
+
145
+ - Particles such as له le (= at, in, on) and به be (= to,
146
+ for, by, with) should be written separately from their
147
+ following word, e.g. كوردستانێ له Le Kurdistanê “in
148
+ Kurdistan”
149
+
150
+ - Occasionally the character sequences چه ,سه and گه occur.
151
+ They may be romanized c·h, s·h, and g·h in order to
152
+ differentiate those romanizations from the digraphs ch, sh,
153
+ and gh.
154
+ }
155
+
156
+ tests {
157
+ test "كاني ماسێ", "Kanî Masê"
158
+ test "كِرِن", "Kirin"
159
+ test "ئاگِر", "Agir"
160
+ test "موحەممەد", "Muḧemmed"
161
+ # - source: موسەننا # issue 604
162
+ # expected: Musanna
163
+ test "لەكوردِستانێ", "Le Kurdistanê"
164
+ }
165
+
166
+ stage {
167
+
168
+ # CHARACTERS
169
+ parallel {
170
+
171
+ sub "\u0650", "i" # ِ kasra special rule 2
172
+ sub "\u0644\u06d5", "le " # special rule 7
173
+ sub "\u0628\u06d5", "be " # special rule 7
174
+ # Note 1
175
+ sub "\u0621", "’" # ء
176
+ sub "\u0624", "’" # ؤ
177
+ sub "\u0626", "’" # ئ
178
+
179
+ sub "و", "u", before: any("ء|ب|پ|ت|ج|چ|ح|خ|د|ر|ڕ|ز|ژ|س|ش|ص|ض|ط|ع|غ|́|ڨ|ق|ک|ك|گ|ل|ڵ|م|ن|و|ه|ي") # special note 4/5
180
+ sub "ي", "î", before: any("ء|ب|پ|ت|ج|چ|ح|خ|د|ر|ڕ|ز|ژ|س|ش|ص|ض|ط|ع|غ|́|ڨ|ق|ک|ك|گ|ل|ڵ|م|ن|و|ه|ي") # special note 4/5
181
+ sub "\u0621", "’" # ء (see note 1 and 7)
182
+ sub "\u0628", "b" # ب
183
+ sub "\u067E", "p" # پ
184
+ sub "\u062A", "t" # ت (see note 2)
185
+ sub "\u062C", "c" # ج
186
+ sub "\u0686", any(["ch", "ç"]) # چ (see notes 3 and 7)
187
+ sub "\u062D", "ḧ" # ح
188
+ sub "\u062E", "x" # خ
189
+ sub "\u062F", "d" # د
190
+ sub "\u0631", "r" # ر
191
+ sub "\u0695", "ṟ" # ڕ (Formerly written ڒ ڔ or رر according to typeface available; may vary on older sources. See note 7.)
192
+ sub "\u0632", "z" # ز (see note 2)
193
+ sub "\u0698", "j" # ژ
194
+ sub "\u0633", "s" # س (see note 2)
195
+ sub "\u0634", any(["sh", "ş"]) # ش (see notes 3 and 7)
196
+ sub "\u0635", "ṣ" # ص (see notes 2 and 7)
197
+ sub "\u0636", "ḍ" # ض (see notes 2 and 7)
198
+ sub "\u0637", "ṭ" # ط (see notes 2 and 7)
199
+ sub "\u0639", "‘" # ع (see note 7)
200
+ sub "\u063A", any(["gh", "ẍ"]) # غ (see notes 3 and 7)
201
+ sub "\u0341", "f" # ف
202
+ sub "\u06A8", "v" # ڨ (see note 4)
203
+ sub "\u0642", "q" # ق
204
+ sub "\u06A9", "k" # ك
205
+ sub "\u0643", "k" # ك
206
+ sub "\u06AF", "g" # گ
207
+ sub "\u0644", "l" # ل
208
+ sub "\u06B5", "ł" # ڵ (Formerly written ڶ according to type available; may vary on older sources. See note 7)
209
+ sub "\u0645", "m" # م
210
+ sub "\u0646", "n" # ن
211
+ sub "\u0648", "w" # و (see note 4)
212
+ sub "\u0647", "h" # ه (see note 5)
213
+ sub "\u064A", "y" # ي
214
+
215
+ # VOWELS
216
+ sub "\u0647" + boundary, "e" # See notes 1 and 5
217
+ sub "\u06D5", "e" # See notes 1 and 5
218
+ sub "\u0626\u06D5", "e" # See notes 1 and 5
219
+ sub "\u0627", "a" # See note 1
220
+ sub "\u0626\u0627", "a" # See note 1
221
+ sub "\u064A", "î" # See notes 1, 6 and 7
222
+ sub "\u0626\u064A", "î" # See notes 1, 6 and 7
223
+ sub "\u0626", "i"
224
+ sub "\u06CE", "ê" # See note 7
225
+ sub "\u0626\u06CE", "ê" # See note 7
226
+ sub "\u0648", "u"
227
+ sub "\u0626\u0648", "u"
228
+ sub "\u0648\u0648", "û" # See note 7
229
+ sub "\u0626\u0648\u0648", "û" # See note 7
230
+ sub "\u06C6", "o"
231
+ sub "\u0626\u06C6", "o"
232
+ sub "\u0648", "ö" # Rare; previously written وي . See note 7
233
+ sub "\u06CA", "ü" # Only appearing in some dialects and only in old sources. Often equated to /û/ (row 7 above). Sometimes written يو See note 7.
234
+ }
235
+
236
+ # POSTRULES
237
+ sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
238
+
239
+
240
+ }