interscript-maps 2.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (291) hide show
  1. checksums.yaml +7 -0
  2. data/README.adoc +28 -0
  3. data/interscript-maps.gemspec +28 -0
  4. data/interscript-maps.yaml +235 -0
  5. data/libs/posix.iml +11 -0
  6. data/libs/unicode.iml +13 -0
  7. data/libs/var-Cyrl.iml +7 -0
  8. data/libs/var-kor.iml +17 -0
  9. data/maps-staging/royin-tha-Thai-Latn-1939-generic.imp +98 -0
  10. data/maps-staging/royin-tha-Thai-Latn-1968.imp +156 -0
  11. data/maps-staging/royin-tha-Thai-Latn-1999-chained.imp +161 -0
  12. data/maps-staging/royin-tha-Thai-Latn-1999.imp +78 -0
  13. data/maps-staging/var-tha-Thai-Thai-phonemic.imp +53 -0
  14. data/maps-staging/var-tha-Thai-Zsym-ipa.imp +273 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.imp +27515 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.imp +392 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.imp +85 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.imp +1171 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.imp +214 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.imp +53 -0
  21. data/maps/alalc-aze-Arab-Latn-1997.imp +321 -0
  22. data/maps/alalc-aze-Cyrl-Latn-1997.imp +101 -0
  23. data/maps/alalc-bel-Cyrl-Latn-1997.imp +118 -0
  24. data/maps/alalc-ben-Beng-Latn-1997.imp +225 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.imp +135 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.imp +110 -0
  27. data/maps/alalc-div-Thaa-Latn-1997.imp +171 -0
  28. data/maps/alalc-ell-Grek-Latn-1997.imp +381 -0
  29. data/maps/alalc-ell-Grek-Latn-2010.imp +382 -0
  30. data/maps/alalc-guj-Gujr-Latn-1997.imp +223 -0
  31. data/maps/alalc-guj-Gujr-Latn-2011.imp +57 -0
  32. data/maps/alalc-hin-Deva-Latn-1997.imp +248 -0
  33. data/maps/alalc-hin-Deva-Latn-2011.imp +63 -0
  34. data/maps/alalc-kan-Kana-Latn-1997.imp +233 -0
  35. data/maps/alalc-kan-Kana-Latn-2011.imp +58 -0
  36. data/maps/alalc-kat-Geok-Latn-1997.imp +109 -0
  37. data/maps/alalc-kat-Geor-Latn-1997.imp +104 -0
  38. data/maps/alalc-kor-Hang-Latn-1997.imp +68 -0
  39. data/maps/alalc-mal-Mlym-Latn-1997.imp +260 -0
  40. data/maps/alalc-mal-Mlym-Latn-2012.imp +65 -0
  41. data/maps/alalc-mar-Deva-Latn-1997.imp +178 -0
  42. data/maps/alalc-mar-Deva-Latn-2011.imp +51 -0
  43. data/maps/alalc-mkd-Cyrl-Latn-1997.imp +125 -0
  44. data/maps/alalc-mkd-Cyrl-Latn-2013.imp +113 -0
  45. data/maps/alalc-mon-Cyrl-Latn-1997.imp +161 -0
  46. data/maps/alalc-ori-Orya-Latn-1997.imp +234 -0
  47. data/maps/alalc-ori-Orya-Latn-2011.imp +59 -0
  48. data/maps/alalc-pan-Guru-Latn-1997.imp +241 -0
  49. data/maps/alalc-pan-Guru-Latn-2011.imp +71 -0
  50. data/maps/alalc-per-Arab-Latn-1997.imp +318 -0
  51. data/maps/alalc-pli-Deva-Latn-2012.imp +140 -0
  52. data/maps/alalc-pra-Deva-Latn-2012.imp +52 -0
  53. data/maps/alalc-rus-Cyrl-Latn-1997.imp +165 -0
  54. data/maps/alalc-rus-Cyrl-Latn-2012.imp +107 -0
  55. data/maps/alalc-san-Deva-Latn-2012.imp +207 -0
  56. data/maps/alalc-sin-Sinh-Latn-1997.imp +246 -0
  57. data/maps/alalc-sin-Sinh-Latn-2011.imp +63 -0
  58. data/maps/alalc-srp-Cyrl-Latn-1997.imp +124 -0
  59. data/maps/alalc-srp-Cyrl-Latn-2013.imp +115 -0
  60. data/maps/alalc-tam-Taml-Latn-1997.imp +52 -0
  61. data/maps/alalc-tam-Taml-Latn-2011.imp +49 -0
  62. data/maps/alalc-tel-Telu-Latn-1997.imp +237 -0
  63. data/maps/alalc-tel-Telu-Latn-2011.imp +58 -0
  64. data/maps/alalc-ukr-Cyrl-Latn-1997.imp +123 -0
  65. data/maps/alalc-ukr-Cyrl-Latn-2011.imp +32 -0
  66. data/maps/apcbg-bul-Cyrl-Latn-1995.imp +194 -0
  67. data/maps/az-aze-Cyrl-Latn-1939.imp +105 -0
  68. data/maps/az-aze-Cyrl-Latn-1958.imp +50 -0
  69. data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +160 -0
  70. data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +165 -0
  71. data/maps/bgn-jpn-Hrkt-Latn-1962.imp +288 -0
  72. data/maps/bgn-kor-Hang-Latn-1943.imp +31 -0
  73. data/maps/bgn-kor-Kore-Latn-1943.imp +33 -0
  74. data/maps/bgna-bul-Cyrl-Latn-2006.imp +119 -0
  75. data/maps/bgna-bul-Cyrl-Latn-2009.imp +119 -0
  76. data/maps/bgnpcgn-amh-Ethi-Latn-1967.imp +393 -0
  77. data/maps/bgnpcgn-ara-Arab-Latn-1956.imp +472 -0
  78. data/maps/bgnpcgn-arm-Armn-Latn-1981.imp +125 -0
  79. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.imp +111 -0
  80. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +169 -0
  81. data/maps/bgnpcgn-bal-Arab-Latn-2008.imp +296 -0
  82. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +200 -0
  83. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.imp +137 -0
  84. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.imp +38 -0
  85. data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +176 -0
  86. data/maps/bgnpcgn-deu-Latn-Latn-2000.imp +56 -0
  87. data/maps/bgnpcgn-div-Thaa-Latn-1972.imp +90 -0
  88. data/maps/bgnpcgn-div-Thaa-Latn-1988.imp +71 -0
  89. data/maps/bgnpcgn-ell-Grek-Latn-1962.imp +443 -0
  90. data/maps/bgnpcgn-ell-Grek-Latn-1996.imp +269 -0
  91. data/maps/bgnpcgn-fao-Latn-Latn-1964.imp +41 -0
  92. data/maps/bgnpcgn-fao-Latn-Latn-1968.imp +28 -0
  93. data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +111 -0
  94. data/maps/bgnpcgn-isl-Latn-Latn-1964.imp +42 -0
  95. data/maps/bgnpcgn-isl-Latn-Latn-1968.imp +32 -0
  96. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.imp +191 -0
  97. data/maps/bgnpcgn-kat-Geor-Latn-1981.imp +116 -0
  98. data/maps/bgnpcgn-kat-Geor-Latn-2009.imp +43 -0
  99. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.imp +193 -0
  100. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.imp +170 -0
  101. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +177 -0
  102. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +40 -0
  103. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +41 -0
  104. data/maps/bgnpcgn-kur-Arab-Latn-2007.imp +240 -0
  105. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.imp +132 -0
  106. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.imp +174 -0
  107. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.imp +168 -0
  108. data/maps/bgnpcgn-nep-Deva-Latn-2011.imp +208 -0
  109. data/maps/bgnpcgn-per-Arab-Latn-1958.imp +312 -0
  110. data/maps/bgnpcgn-prs-Arab-Latn-2007.imp +552 -0
  111. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.imp +445 -0
  112. data/maps/bgnpcgn-pus-Arab-Latn-1968.imp +289 -0
  113. data/maps/bgnpcgn-ron-cyrl-latn-2002.imp +165 -0
  114. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.imp +133 -0
  115. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.imp +195 -0
  116. data/maps/bgnpcgn-sme-Latn-Latn-1984.imp +48 -0
  117. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.imp +55 -0
  118. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.imp +146 -0
  119. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.imp +185 -0
  120. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.imp +188 -0
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.imp +136 -0
  122. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.imp +88 -0
  123. data/maps/bgnpcgn-urd-Arab-Latn-2007.imp +333 -0
  124. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.imp +145 -0
  125. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.imp +74 -0
  126. data/maps/bgnpcgn-zho-Hans-Latn-1979.imp +7463 -0
  127. data/maps/bis-asm-Beng-Latn-13194-1991.imp +154 -0
  128. data/maps/bis-ben-Beng-Latn-13194-1991.imp +151 -0
  129. data/maps/bis-dev-Deva-Latn-13194-1991.imp +178 -0
  130. data/maps/bis-guj-Gujr-Latn-13194-1991.imp +172 -0
  131. data/maps/bis-kan-Kana-Latn-13194-1991.imp +166 -0
  132. data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +170 -0
  133. data/maps/bis-ori-Orya-Latn-13194-1991.imp +168 -0
  134. data/maps/bis-pnj-Guru-Latn-13194-1991.imp +169 -0
  135. data/maps/bis-tel-Telu-Latn-13194-1991.imp +165 -0
  136. data/maps/bis-tml-Taml-Latn-13194-1991.imp +149 -0
  137. data/maps/by-bel-Cyrl-Latn-1998.imp +123 -0
  138. data/maps/by-bel-Cyrl-Latn-2007.imp +77 -0
  139. data/maps/din-grc-Grek-Latn-31634-2011-t1.imp +627 -0
  140. data/maps/din-hin-Deva-Latn-33904-2018.imp +101 -0
  141. data/maps/din-kat-Geor-Latn-32707-2010.imp +103 -0
  142. data/maps/din-mar-Deva-Latn-33904-2018.imp +83 -0
  143. data/maps/din-nep-Deva-Latn-33904-2018.imp +110 -0
  144. data/maps/din-pli-Deva-Latn-33904-2018.imp +72 -0
  145. data/maps/din-pra-Deva-Latn-33904-2018.imp +66 -0
  146. data/maps/din-san-Deva-Latn-33904-2018.imp +294 -0
  147. data/maps/din-tam-Taml-Latn-33903-2016.imp +187 -0
  148. data/maps/dos-nep-Deva-Latn-1997.imp +47 -0
  149. data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +399 -0
  150. data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +397 -0
  151. data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +34 -0
  152. data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +178 -0
  153. data/maps/ggg-kat-Geor-Latn-2002.imp +75 -0
  154. data/maps/gki-bel-Cyrl-Latn-1992.imp +44 -0
  155. data/maps/gki-bel-Cyrl-Latn-2000.imp +159 -0
  156. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.imp +179 -0
  157. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.imp +132 -0
  158. data/maps/hk-yue-Hani-Latn-1888.imp +29201 -0
  159. data/maps/icao-bel-Cyrl-Latn-9303.imp +136 -0
  160. data/maps/icao-bul-Cyrl-Latn-9303.imp +127 -0
  161. data/maps/icao-fas-Arab-Latn-9303.imp +112 -0
  162. data/maps/icao-heb-Hebr-Latn-9303.imp +160 -0
  163. data/maps/icao-mkd-Cyrl-Latn-9303.imp +126 -0
  164. data/maps/icao-rus-Cyrl-Latn-9303.imp +126 -0
  165. data/maps/icao-srp-Cyrl-Latn-9303.imp +126 -0
  166. data/maps/icao-ukr-Cyrl-Latn-9303.imp +127 -0
  167. data/maps/iso-ara-Arab-Latn-233-1984.imp +301 -0
  168. data/maps/iso-asm-Beng-Latn-15919-2001.imp +73 -0
  169. data/maps/iso-ben-Beng-Latn-15919-2001.imp +171 -0
  170. data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +365 -0
  171. data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +43 -0
  172. data/maps/iso-guj-Gujr-Latn-15919-2001.imp +214 -0
  173. data/maps/iso-hin-Deva-Latn-15919-2001.imp +73 -0
  174. data/maps/iso-inc-Deva-Latn-15919-2001.imp +61 -0
  175. data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +59 -0
  176. data/maps/iso-kan-Kana-Latn-15919-2001.imp +212 -0
  177. data/maps/iso-kat-Geor-Latn-9984-1996.imp +103 -0
  178. data/maps/iso-kor-Hang-Latn-1996-method1.imp +140 -0
  179. data/maps/iso-kor-Hang-Latn-1996-method2.imp +132 -0
  180. data/maps/iso-mal-Mlym-Latn-15919-2001.imp +276 -0
  181. data/maps/iso-mar-Deva-Latn-15919-2001.imp +68 -0
  182. data/maps/iso-nep-Deva-Latn-15919-2001.imp +75 -0
  183. data/maps/iso-ori-Orya-Latn-15919-2001.imp +188 -0
  184. data/maps/iso-pan-Guru-Latn-15919-2001.imp +217 -0
  185. data/maps/iso-pli-Beng-Latn-15919-2001.imp +66 -0
  186. data/maps/iso-pli-Deva-Latn-15919-2001.imp +68 -0
  187. data/maps/iso-pli-Sinh-Latn-15919-2001.imp +211 -0
  188. data/maps/iso-pli-Thai-Latn-15919-2001.imp +47 -0
  189. data/maps/iso-pra-Deva-Latn-15919-2001.imp +60 -0
  190. data/maps/iso-prs-Arab-Latn-233-3-1999.imp +352 -0
  191. data/maps/iso-rus-Cyrl-Latn-9-1995.imp +279 -0
  192. data/maps/iso-san-Deva-Latn-15919-2001.imp +215 -0
  193. data/maps/iso-tam-Taml-Latn-15919-2001.imp +153 -0
  194. data/maps/iso-tel-Telu-Latn-15919-2001.imp +214 -0
  195. data/maps/iso-tha-Thai-Latn-11940-1998.imp +114 -0
  196. data/maps/kp-kor-Hang-Latn-2002.imp +540 -0
  197. data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +29005 -0
  198. data/maps/masm-mon-Cyrl-Latn-5217-2012.imp +136 -0
  199. data/maps/masm-mon-Latn-Cyrl-5217-2012.imp +162 -0
  200. data/maps/mext-jpn-Hrkt-Latn-1954.imp +403 -0
  201. data/maps/moct-kor-Hang-Latn-2000.imp +475 -0
  202. data/maps/mofa-jpn-Hrkt-Latn-1989.imp +484 -0
  203. data/maps/mv-div-Thaa-Latn-1987.imp +144 -0
  204. data/maps/mvd-bel-Cyrl-Latn-2008.imp +224 -0
  205. data/maps/mvd-bel-Cyrl-Latn-2010.imp +64 -0
  206. data/maps/mvd-rus-Cyrl-Latn-2008.imp +110 -0
  207. data/maps/mvd-rus-Cyrl-Latn-2010.imp +40 -0
  208. data/maps/odni-ara-Arab-Latn-2004.imp +106 -0
  209. data/maps/odni-ara-Arab-Latn-2015.imp +281 -0
  210. data/maps/odni-aze-Cyrl-Latn-2015.imp +158 -0
  211. data/maps/odni-bel-Cyrl-Latn-2015.imp +138 -0
  212. data/maps/odni-bul-Cyrl-Latn-2005.imp +90 -0
  213. data/maps/odni-bul-Cyrl-Latn-2015.imp +103 -0
  214. data/maps/odni-che-Cyrl-Latn-2015.imp +165 -0
  215. data/maps/odni-fas-Arab-Latn-2004.imp +268 -0
  216. data/maps/odni-fas-Arab-Latn-2015.imp +398 -0
  217. data/maps/odni-hin-Deva-Latn-2004.imp +180 -0
  218. data/maps/odni-hin-Deva-Latn-2015.imp +256 -0
  219. data/maps/odni-kat-Geor-Latn-2015.imp +76 -0
  220. data/maps/odni-kaz-Cyrl-Latn-2015.imp +164 -0
  221. data/maps/odni-kir-Cyrl-Latn-2015.imp +149 -0
  222. data/maps/odni-kor-Hang-Latn-2015.imp +307 -0
  223. data/maps/odni-mkd-Cyrl-Latn-2005.imp +28 -0
  224. data/maps/odni-mkd-Cyrl-Latn-2015.imp +124 -0
  225. data/maps/odni-prs-Arab-Latn-2004.imp +120 -0
  226. data/maps/odni-prs-Arab-Latn-2015.imp +225 -0
  227. data/maps/odni-pus-Arab-Latn-2011.imp +327 -0
  228. data/maps/odni-rus-Cyrl-Latn-2015.imp +79 -0
  229. data/maps/odni-srp-Cyrl-Latn-2005.imp +35 -0
  230. data/maps/odni-srp-Cyrl-Latn-2015.imp +130 -0
  231. data/maps/odni-tat-Cyrl-Latn-2015.imp +157 -0
  232. data/maps/odni-tgk-Cyrl-Latn-2015.imp +161 -0
  233. data/maps/odni-tuk-Cyrl-Latn-2015.imp +159 -0
  234. data/maps/odni-uig-Cyrl-Latn-2015.imp +151 -0
  235. data/maps/odni-ukr-Cyrl-Latn-2015.imp +136 -0
  236. data/maps/odni-urd-Arab-Latn-2015.imp +220 -0
  237. data/maps/odni-uzb-Cyrl-Latn-2015.imp +165 -0
  238. data/maps/sac-zho-Hans-Latn-1979.imp +20940 -0
  239. data/maps/sasm-mon-Mong-Latn-general-1978.imp +294 -0
  240. data/maps/sasm-mon-Mong-Latn-phonetic-1978.imp +261 -0
  241. data/maps/ses-ara-Arab-Latn-1930.imp +225 -0
  242. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.imp +171 -0
  243. data/maps/ua-ukr-Cyrl-Latn-1996.imp +149 -0
  244. data/maps/ua-ukr-Cyrl-Latn-2007.imp +69 -0
  245. data/maps/ua-ukr-Cyrl-Latn-2010.imp +128 -0
  246. data/maps/un-amh-Ethi-Latn-2016.imp +483 -0
  247. data/maps/un-ara-Arab-Latn-1971.imp +137 -0
  248. data/maps/un-ara-Arab-Latn-1972.imp +155 -0
  249. data/maps/un-ara-Arab-Latn-2017.imp +375 -0
  250. data/maps/un-asm-Beng-Latn-1972.imp +188 -0
  251. data/maps/un-bel-Cyrl-Latn-2007.imp +78 -0
  252. data/maps/un-ben-Beng-Latn-2016.imp +516 -0
  253. data/maps/un-ell-Grek-Latn-1987-phonetic.imp +437 -0
  254. data/maps/un-ell-Grek-Latn-1987-tl.imp +27 -0
  255. data/maps/un-ell-Grek-Latn-1987-ts.imp +269 -0
  256. data/maps/un-guj-Gujr-Latn-1972.imp +196 -0
  257. data/maps/un-hin-Deva-Latn-2016.imp +356 -0
  258. data/maps/un-kan-Kana-Latn-2016.imp +214 -0
  259. data/maps/un-mal-Mlym-Latn-1972.imp +215 -0
  260. data/maps/un-mar-Deva-Latn-2016.imp +96 -0
  261. data/maps/un-mon-Mong-Latn-general-2013.imp +170 -0
  262. data/maps/un-mon-Mong-Latn-phonetic-2013.imp +170 -0
  263. data/maps/un-nep-Deva-Latn-1972.imp +295 -0
  264. data/maps/un-nep-Deva-Latn-2013.imp +62 -0
  265. data/maps/un-ori-Orya-Latn-1972.imp +208 -0
  266. data/maps/un-pan-Guru-Latn-1972.imp +321 -0
  267. data/maps/un-prs-Arab-Latn-1967.imp +214 -0
  268. data/maps/un-rus-Cyrl-Latn-1987.imp +96 -0
  269. data/maps/un-sin-Sinh-Latn-1972.imp +193 -0
  270. data/maps/un-tam-Taml-Latn-1972.imp +173 -0
  271. data/maps/un-tel-Telu-Latn-1972.imp +229 -0
  272. data/maps/un-ukr-Cyrl-Latn-1998.imp +58 -0
  273. data/maps/un-ukr-Cyrl-Latn-2012.imp +95 -0
  274. data/maps/un-urd-Arab-Latn-1972.imp +290 -0
  275. data/maps/var-amh-Ethi-Latn-eae-2003.imp +414 -0
  276. data/maps/var-gez-Ethi-Latn-eae-2003.imp +54 -0
  277. data/maps/var-hin-Deva-Latn-hunterian-1872.imp +212 -0
  278. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +399 -0
  279. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +382 -0
  280. data/maps/var-kor-Hang-Hang-jamo.imp +11196 -0
  281. data/maps/var-kor-Hang-Latn-mr-1939.imp +574 -0
  282. data/maps/var-kor-Kore-Hang-2013.imp +59764 -0
  283. data/maps/var-kor-Kore-Latn-mr-1939.imp +36 -0
  284. data/maps/var-mar-Deva-Latn-hunterian-1872.imp +39 -0
  285. data/maps/var-mon-Mong-Latn-1930.imp +101 -0
  286. data/maps/var-mon-Mong-Latn-lessing.imp +181 -0
  287. data/maps/var-mon-Mong-Latn-vpmc.imp +182 -0
  288. data/maps/var-pra-Deva-Latn-iast-1912.imp +36 -0
  289. data/maps/var-san-Deva-Latn-iast-1912.imp +147 -0
  290. data/maps/var-zho-Hani-Latn-wd-1979.imp +27549 -0
  291. metadata +335 -0
@@ -0,0 +1,40 @@
1
+ metadata {
2
+ authority_id: mvd
3
+ id: 2010
4
+ language: iso-639-2:bel
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: |
8
+ 8/22721 On approval of the Instructions on the organization of work of units of citizenship
9
+ and migration of internal affairs bodies on the issuance, registration, exchange,
10
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
11
+ url: https://pravo.by/document/?guid=3871&p0=W21022721
12
+ creation_date: 2010
13
+
14
+ description: |
15
+ RESOLUTION OF THE MINISTRY OF INTERNAL AFFAIRS OF THE REPUBLIC OF BELARUS
16
+ June 28, 2010 No. 200
17
+ On approval of the Instructions on the organization of work of units of citizenship
18
+ and migration of internal affairs bodies on the issuance, registration, exchange,
19
+ invalidation, seizure, storage and destruction of a passport of a citizen of the Republic of Belarus
20
+
21
+ notes:
22
+ - check notes from mvd-rus-Cyrl-Latn-2008
23
+ }
24
+
25
+ tests {
26
+ test "Ева", "Eva"
27
+ test "Васiльева", "Vasileva"
28
+ test "Адъютантов", "Adjutantov"
29
+ }
30
+
31
+ dependency "mvd-rus-Cyrl-Latn-2008", as: cyrllatn
32
+
33
+ stage {
34
+ run map.cyrllatn.stage.translit
35
+
36
+ # POSTRULES
37
+ sub "\u0301", "" # remove diacritics
38
+
39
+ compose
40
+ }
@@ -0,0 +1,106 @@
1
+ metadata {
2
+ authority_id: odni
3
+ id: 2004
4
+ language: ics-630-01:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Arabic Personal Names Office of the Director Of National Intelligence 2004 System
8
+ url: https://github.com/interscript/interscript-private-references/blob/master/odni/Arabic_IC_Standard.doc
9
+ creation_date: 2004
10
+ confirmation_date: 2004-06
11
+ description:
12
+ notes:
13
+ - Long/Short Vowels Long and short vowels are not
14
+ distinguished in this system Samir (could be Saamir or
15
+ Samiir in Arabic).
16
+
17
+ - Double consonants Double consonants represented by the
18
+ Arabic shaddah are shown in most cases (e.g., Hassan,
19
+ Muhammad). Exceptions ’ayn and consonants represented by
20
+ digraphs are not doubled (e.g., al-Qadhafi [not
21
+ alQadhdhafi], Mubashir [not Mubashshir]).
22
+
23
+ - Hamzah (glottal stop) The hamzah is represented by an
24
+ apostrophe (’). Note that this is the same symbol used to
25
+ represent another consonant, the ’ayn.
26
+
27
+ - Ta’ marbutah (feminine ending marker) On the construct
28
+ form or when pronounced “t”, it is represented with a roman
29
+ t. In all other cases, it is represented with an h.
30
+
31
+ - Digraphs No distinction is made between digraphs such as
32
+ sh and single contiguous letters (e.g., s followed by h).
33
+
34
+ - Definite article “al” (‘the’) Follows Arabic spelling
35
+ rather than pronunciation. That is, sun letter assimilation
36
+ is not shown in the Romanized form (e.g., ’Abd-alRahman,
37
+ not ’Abd-ar-Rahman).
38
+
39
+ - Diphthongs the second element of the diphthong is
40
+ represented by a y or a w (rather than an i or a u)
41
+ Haytham, Faysal, Tawfiq, Rawdah.
42
+
43
+ - Hyphens Hyphens (-) are used to connect name elements
44
+ within a name ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
45
+ Exceptions Names that incorporate “Allah” as part of the
46
+ name (e.g., ’Abdallah, Nasrallah), names marked by the
47
+ lineage/family marker “Al” (e.g., Al Thani) are not
48
+ hyphenated.
49
+
50
+ - The definite article, “al”, within name phrases, is
51
+ Romanized as al and not as ul Nur-al-Din (not Nur-ul-Din).
52
+ It is not capitalized when name-initial.
53
+
54
+ - Names that incorporate Allah as part of the name retain the
55
+ a of Allah rather than a grammatical marker u ’Abdallah (
56
+ not ’Abdullah).
57
+
58
+ - Foreign names borrowed or appearing in Arabic are spelled
59
+ according to the standard Western tradition Georges,
60
+ Michel. However, names of non-Arabic origin no longer
61
+ considered foreign by Arabic speakers follow the IC
62
+ conventions Butrus (not Peter).
63
+
64
+ - Prefix ‫بن‬ (bin ‘son of’) is Romanized Bin unless written
65
+ with an alif, in which case it is Romanized as Ibn. The
66
+ colloquial form Bu (‘father’) should not be standardized as
67
+ Abu. These prefixes are capitalized.
68
+
69
+ - In general, Romanization follows the Modern Standard
70
+ Arabic (MSA) form rather than local pronunciation
71
+ standards. For example, the letter ‫ج‬ (jim) is represented
72
+ as a j even when pronounced as a “g” (e.g., Egyptian Gamal
73
+ is Romanized as Jamal).
74
+ }
75
+
76
+ tests {
77
+ test "مِصر", "Miṣr"
78
+ test "قَطَر", "Qaṭar"
79
+ test "المَغرِب", "Al Maghrib"
80
+ test "الجُمهُورِيَّة العِراقِيَّة", "Al Jumhuriyah al ’Iraqiyah"
81
+ test "جُمهُورِيَّة العِراق", "Jumhuriyat al ’Iraq"
82
+ test "جُمهُورِيَّة مِصر العَرَبِيَّة", "Jumhuriyat Miṣr al ’Arabiyah"
83
+ test "بَغداد", "Baghdad"
84
+ test "تُونِس", "Tunis"
85
+ test "حَسّان", "Hassan"
86
+ test "مُحَمَّد", "Muhammad"
87
+ test "القَذَّافِي", "Al Qadhafi"
88
+ test "مُبَشِّر", "Mubashir"
89
+ test "الجَزائِر", "Al Jaza’ir"
90
+ test "عَبدالرَحمَن", "’Abd al Rahman"
91
+ test "هَيْثَم", "Haytham"
92
+ test "فَيْصَل", "Fayṣal"
93
+ test "تَوْفِيق", "Tawfiq"
94
+ test "رَوْضَة", "Rawḍah"
95
+ test "نُورُالدِين", "Nur al Din"
96
+ test "عَبدُاللَّه", "’Abdallah"
97
+ }
98
+
99
+ dependency "odni-ara-Arab-Latn-2015", as: arablatn
100
+
101
+ stage {
102
+
103
+ run map.arablatn.stage.main
104
+
105
+ }
106
+
@@ -0,0 +1,281 @@
1
+ metadata {
2
+ authority_id: odni
3
+ id: 2017
4
+ language: ics-630-01:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: Office of the Director Of National Intelligence Arabic Personal Names 2015 System
8
+ url: https://github.com/interscript/ics-630-01/blob/master/reference-docs/ANNEX%20A%20-%20Arabic_Personal_Names_FLTS%20(U).pdf
9
+ creation_date: 2017
10
+ confirmation_date: 2018-06
11
+ description: |
12
+ This system, adapted from the Board on Geographic Names, is
13
+ the Intelligence Community (IC) standard for the
14
+ transliteration of Arabic names that will be applied to all
15
+ final written reports and products for IC consumers. It is
16
+ not intended to eliminate variations of a name that can
17
+ contribute forensic information. Rather, it is to provide
18
+ an IC standard Romanized (English) transliteration from
19
+ modern standard Arabic that can then be linked to forensic
20
+ information in ways that will help identify the referent of
21
+ the name. Ambiguities can result from the Romanization of
22
+ Arabic names because the Arabic source generally omits
23
+ short vowel markings, double consonant marks, and other
24
+ diacritics that would clearly distinguish the name.
25
+ Linguists use their experience with the language and aids
26
+ such as on-line tools and name dictionaries to determine
27
+ the exact Arabic and the appropriate transliteration into
28
+ the Roman alphabet. In cases where an individual's name has
29
+ already been transliterated, that is to be indicated -- as
30
+ found -- in parentheses immediately following its rendition
31
+ in the transliteration standard (e.g., Muhammad Khulud (
32
+ Mohamed Khulood)). In addition, if the original Arabic-
33
+ script spelling is known, that spelling should also appear
34
+ in parentheses following the name, if possible, following
35
+ best practices of the issuing organization and taking into
36
+ consideration information system capabilities. This
37
+ convention is designed to ensure that vital forensic
38
+ information is not lost. For names of persons who are known
39
+ to not be part of the Arabic-speaking community, use the
40
+ relevant IC transliteration standard for names from that
41
+ language (e.g., Mikhail, Yitzhak). A translator’s note may
42
+ be used to clarify the known origin of the person. Spell
43
+ names of individuals from languages that are written in
44
+ Roman letters as they are spelled in those languages (e.g.,
45
+ George Clooney, Jorge Garcia, Georges Pompidou). In the
46
+ case of active senior government officials in the on-line
47
+ CIA World Factbook and the online directory of Chiefs of
48
+ State and Cabinet Members of Foreign Governments, the
49
+ spellings given in these on-line reference works should be
50
+ used in place of the IC Standard. For any individual who
51
+ has at one time been listed in the Factbook or Chiefs of
52
+ State directory but who no longer appears in those
53
+ resources (i.e. is no longer a government official), the IC
54
+ Standard spelling should appear first, with the spelling,
55
+ if known, as it previously appeared in those resources
56
+ listed within parentheses at the first usage. The primary
57
+ goal of this system is to produce a consistent Romanized
58
+ transcription of the name that is readable to the non-
59
+ specialist. The system uses the 26 letters of the standard (
60
+ English) Roman alphabet plus the apostrophe. Some
61
+ ambiguities in the Romanized form will occur without the
62
+ use of diacritics. However, within the context of a report,
63
+ where additional information about the individual is
64
+ provided, the referent will be clearly identified. This
65
+ system will be used in conjunction with on-line tools, name
66
+ dictionaries, and lists containing conventional spellings
67
+ of names of well-known individuals.
68
+ notes: |
69
+ - Long/Short Vowels: Long and short vowels are not
70
+ distinguished in this system Samir (could be Saamir or
71
+ Samiir in Arabic).
72
+
73
+ - Double consonants: Double consonants represented by the
74
+ Arabic shaddah are shown in most cases (e.g., Hassan,
75
+ Muhammad). Exceptions: ’ayn and consonants represented by
76
+ digraphs are not doubled (e.g., al-Qadhafi [not
77
+ alQadhdhafi], Mubashir [not Mubashshir]).
78
+
79
+ - Hamzah (glottal stop): The hamzah is represented by an
80
+ apostrophe (’). Note that this is the same symbol used to
81
+ represent another consonant, the ’ayn.
82
+
83
+ - Ta’ marbutah (feminine ending marker): On the construct
84
+ form or when pronounced “t”, it is represented with a roman
85
+ t. In all other cases, it is represented with an h.
86
+
87
+ - Digraphs: No distinction is made between digraphs such as
88
+ sh and single contiguous letters (e.g., s followed by h).
89
+
90
+ - Definite article “al” (‘the’): Follows Arabic spelling
91
+ rather than pronunciation. That is, sun letter assimilation
92
+ is not shown in the Romanized form (e.g., ’Abd-alRahman,
93
+ not ’Abd-ar-Rahman).
94
+
95
+ - Diphthongs: the second element of the diphthong is
96
+ represented by a y or a w (rather than an i or a u):
97
+ Haytham, Faysal, Tawfiq, Rawdah.
98
+
99
+ - Hyphens: Hyphens (-) are used to connect name elements
100
+ within a name: ’Abd- al Rahman, Abu-al-Bashar, Bin-Ladin.
101
+ Exceptions: Names that incorporate “Allah” as part of the
102
+ name (e.g., ’Abdallah, Nasrallah), names marked by the
103
+ lineage/family marker “Al” (e.g., Al Thani) are not
104
+ hyphenated.
105
+
106
+ - The definite article, “al”, within name phrases, is
107
+ Romanized as al and not as ul: Nur-al-Din (not Nur-ul-Din).
108
+ It is not capitalized when name-initial.
109
+
110
+ - Names that incorporate Allah as part of the name retain the
111
+ a of Allah rather than a grammatical marker u: ’Abdallah (
112
+ not ’Abdullah).
113
+
114
+ - Foreign names borrowed or appearing in Arabic are spelled
115
+ according to the standard Western tradition: Georges,
116
+ Michel. However, names of non-Arabic origin no longer
117
+ considered foreign by Arabic speakers follow the IC
118
+ conventions: Butrus (not Peter).
119
+
120
+ - Prefix ‫بن‬ (bin ‘son of’) is Romanized Bin unless written
121
+ with an alif, in which case it is Romanized as Ibn. The
122
+ colloquial form Bu (‘father’) should not be standardized as
123
+ Abu. These prefixes are capitalized.
124
+
125
+ - In general, Romanization follows the Modern Standard
126
+ Arabic (MSA) form rather than local pronunciation
127
+ standards. For example, the letter ‫ج‬ (jim) is represented
128
+ as a j even when pronounced as a “g” (e.g., Egyptian Gamal
129
+ is Romanized as Jamal).
130
+ }
131
+
132
+ tests {
133
+ test "مِصر", "Miṣr"
134
+ test "قَطَر", "Qaṭar"
135
+ test "المَغرِب", "Al Maghrib"
136
+ test "الجُمهُورِيَّة العِراقِيَّة", "Al Jumhuriyah al ’Iraqiyah"
137
+ test "جُمهُورِيَّة العِراق", "Jumhuriyat al ’Iraq"
138
+ test "جُمهُورِيَّة مِصر العَرَبِيَّة", "Jumhuriyat Miṣr al ’Arabiyah"
139
+ test "بَغداد", "Baghdad"
140
+ test "تُونِس", "Tunis"
141
+ test "حَسّان", "Hassan"
142
+ test "مُحَمَّد", "Muhammad"
143
+ test "القَذَّافِي", "Al Qadhafi"
144
+ test "مُبَشِّر", "Mubashir"
145
+ test "الجَزائِر", "Al Jaza’ir"
146
+ test "عَبدالرَحمَن", "’Abd al Rahman"
147
+ test "هَيْثَم", "Haytham"
148
+ test "فَيْصَل", "Fayṣal"
149
+ test "تَوْفِيق", "Tawfiq"
150
+ test "رَوْضَة", "Rawḍah"
151
+ test "نُورُالدِين", "Nur al Din"
152
+ test "عَبدُاللَّه", "’Abdallah"
153
+ }
154
+
155
+
156
+ stage {
157
+
158
+ # CHARACTERS
159
+ parallel {
160
+
161
+ # Tool used for Unicode finding:
162
+ # https://www.branah.com/unicode-converter
163
+
164
+ # pointing
165
+ sub "\u064e", "a" # َ fatha
166
+ sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
167
+ sub "\u064e", "", after: "a" + any("ht") # َ fatha followed by ta' marboota, handling different order of conversion
168
+ sub "\u0650", "i" # ِ kasra
169
+ sub "\u064f", "u" # ُ damma
170
+ sub "\u0652", "" # ْ sokoon, see note A below
171
+
172
+
173
+ sub "\u0650\u064a", "i" # ـِي kasra followed by ي
174
+ sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
175
+ sub "\u0650\u064a", "iy", after: any(["\u064e", "u064f"]) # ـِي kasra followed by ي
176
+ sub "\u064f\u0648", "u" # ـُو damma followed by و
177
+ sub "\u064e\u0627", "a" # ـَا fatha followed by ا
178
+ sub "\u064e\u0649", "á" # ـَى fatha followed by ى which is ا not ي
179
+ sub "\u064e\u0648\u0652", "aw" # ـَوْ
180
+ sub "\u064e\u064a\u0652", "ay" # ـَيْ
181
+ sub "\u0622", "a" # آ
182
+
183
+ # ta' marboota
184
+ sub "\u0629", "at" # ة in the middle of the sentence
185
+ sub "\u0629" + line_end, "ah"
186
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
187
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
188
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
189
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
190
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
191
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
192
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
193
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
194
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
195
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
196
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
197
+ sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
198
+
199
+ # shadda
200
+
201
+ sub "\u0628\u0651", "bb" # ب
202
+ sub "\u062a\u0651", "tt" # ت
203
+ sub "\u062b\u0651", "th" # ث
204
+ sub "\u062c\u0651", "jj" # ج
205
+ sub "\u062d\u0651", "hh" # ح
206
+ sub "\u062e\u0651", "kh" # خ
207
+ sub "\u062f\u0651", "dd" # د
208
+ sub "\u0630\u0651", "dh" # ذ
209
+ sub "\u0631\u0651", "rr" # ر
210
+ sub "\u0632\u0651", "zz" # ز
211
+ sub "\u0633\u0651", "ss" # س
212
+ sub "\u0634\u0651", "sh" # ش
213
+ sub "\u0635\u0651", "ṣṣ" # ص
214
+ sub "\u0636\u0651", "ḍḍ" # ض
215
+ sub "\u0637\u0651", "ṭṭ" # ط
216
+ sub "\u0638\u0651", "ẓẓ" # ظ
217
+ sub "\u063a\u0651", "gh" # غ
218
+ sub "\u0641\u0651", "ff" # ف
219
+ sub "\u0642\u0651", "qq" # ق
220
+ sub "\u0643\u0651", "kk" # ك
221
+ sub "\u0644\u0651", "ll" # ل
222
+ sub "\u0645\u0651", "mm" # م
223
+ sub "\u0646\u0651", "nn" # ن
224
+ sub "\u0647\u0651", "hh" # ه
225
+ sub "\u0648\u0651", "ww" # و
226
+ sub "\u064a\u0651", "yy" # ي
227
+
228
+ sub "\u0626", "’" # ئ
229
+
230
+ sub boundary + "\u0627\u0644\u0644\u0651\u064e\u0647", "Allah"
231
+
232
+ sub non_word_boundary + maybe("\u064f") + "\u0627\u0644\u0644\u0651\u064e\u0647", "allah"
233
+
234
+ sub "\u0621", any(["’", ""]) # ء
235
+
236
+ sub boundary + "\u0627\u0644", "al " # ال
237
+ sub non_word_boundary + maybe("\u064f") + "\u0627\u0644", " al " # ال in middle of composite name
238
+
239
+ # '\uFE8E' : '' # ﺎ
240
+
241
+
242
+ sub "\u0623", "" # أ
243
+ sub boundary + "\u0627", "" # ا
244
+ sub "\u0627", "a" # ا
245
+ sub "\u0628", "b" # ب
246
+ sub "\u062a", "t" # ت
247
+ sub "\u062b", "th" # ث
248
+ sub "\u062c", "j" # ج
249
+ sub "\u062d", "h" # ح
250
+ sub "\u062e", "kh" # خ
251
+ sub "\u062f", "d" # د
252
+ sub "\u0630", "dh" # ذ
253
+ sub "\u0631", "r" # ر
254
+ sub "\u0632", "z" # ز
255
+ sub "\u0633", "s" # س
256
+ sub "\u0634", "sh" # ش
257
+ sub "\u0635", "ṣ" # ص
258
+ sub "\u0636", "ḍ" # ض
259
+ sub "\u0637", "ṭ" # ط
260
+ sub "\u0638", "ẓ" # ظ
261
+ sub "\u0639", "’" # ع
262
+ sub "\u063a", "gh" # غ
263
+ sub "\u0641", "f" # ف
264
+ sub "\u0642", "q" # ق
265
+ sub "\u0643", "k" # ك
266
+ sub "\u0644", "l" # ل
267
+ sub "\u0645", "m" # م
268
+ sub "\u0646", "n" # ن
269
+ sub "\u0647", "h" # ه
270
+ sub "\u0648", "w" # و
271
+ sub "\u064a", "y" # ي
272
+ }
273
+
274
+ # POSTRULES
275
+ sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
276
+ sub " Al ", " al " # ال
277
+
278
+ # don't capitalize defined article in the middle of a sentence
279
+
280
+
281
+ }
@@ -0,0 +1,158 @@
1
+ metadata {
2
+ authority_id: odni
3
+ id: 2015
4
+ language: iso-639-2:aze
5
+ source_script: Cyrl
6
+ destination_script: Latn
7
+ name: Standards for the transliteration of azeri personal names in written reports and products
8
+ source: ICS-630-01 Annex P
9
+ creation_date: 2015
10
+ confirmation_date: 2015
11
+ description: |
12
+ This system is the Intelligence Community standard for the transliteration of Azeri person names
13
+ that will be applied to all final written reports and products for IC consumers. It is not
14
+ intended to eliminate variations of a name that can contribute forensic information. Rather, it is
15
+ to provide an IC standard Romanized (English) transliteration from Azeri that can then be linked
16
+ to forensic information in ways that will help identify the referent of the name.
17
+
18
+ In cases where an individual’s name has already been transliterated in a variant spelling, the IC
19
+ Standard spelling should appear first, followed by the variant spelling(s) in parentheses at the
20
+ first usage. In addition, if the original Cyrillic-script spelling is known, that spelling should
21
+ also appear in parentheses following the name, if possible, following best practices of the
22
+ issuing organization and taking into consideration information system capabilities. For example:
23
+ Rashad Sadykhov (also seen as Rashad Sadigov, Рашад Садыхов). This convention is designed to
24
+ ensure that vital forensic information is not lost.
25
+
26
+ For names of persons who are known to not be part of the Azeri-speaking community, use the
27
+ relevant IC transliteration standard for names from that language (e.g., Yitzhak). A translator’s
28
+ note may be used to clarify the known origin of the person. Spell names of individuals from
29
+ languages that are written in Roman letters as they are spelled in those languages (e.g.,
30
+ George Clooney, Jorge Garcia, Georges Pompidou).
31
+
32
+ In the case of active senior government officials in the on-line CIA World Factbook and the on-
33
+ line directory of Chiefs of State and Cabinet Members of Foreign Governments, the spellings given
34
+ in these on-line reference works should be used in place of the IC Standard. For any individual
35
+ who has at one time been listed in the Factbook or Chiefs of State directory but who no longer
36
+ appears in those resources (i.e. is no longer a government official), the IC Standard spelling
37
+ should appear first, with the spelling, if known, as it previously appeared in those resources
38
+ listed within parentheses at the first usage.
39
+
40
+ The primary goal is to produce a consistent Romanized transcription of names that is specifically
41
+ readable to the English-speaking non-specialist. The system uses the 26 letters of the standard
42
+ (English) Roman alphabet. Some ambiguities in the Romanized form will occur without the use of
43
+ diacritics. However, within the context of a report, where additional information about the
44
+ individual is provided, the referent will be clearly identified. This system will be used in
45
+ conjunction with on-line tools, name dictionaries, and lists containing conventional spellings of
46
+ names of well-known individuals.
47
+
48
+ notes:
49
+ - Transliterate double digraphs as a single digraph, i.e. шш -> sh, not shsh
50
+ - In the Roman, no distinction is made between digraphs such as 'sh' and single contiguous letters,
51
+ (e.g. 's' followed by 'h').
52
+ - The Cyrillic ъ and ь are not transliterated, but instead are left out of the transliteration.
53
+ }
54
+
55
+ tests {
56
+ test "Рашад Садыхов", "Rashad Sadykhov"
57
+ }
58
+
59
+ stage {
60
+
61
+ # RULES
62
+ # note[1]
63
+ # generated with: "ҒЁЖХЧШЩЮЯ".split("").each { |i| puts %{sub capture(any("#{i}#{i.downcase}"))+any("#{i}#{i.downcase}"), ref(1)} }
64
+ sub capture(any("Ғғ"))+any("Ғғ"), ref(1)
65
+ sub capture(any("Ёё"))+any("Ёё"), ref(1)
66
+ sub capture(any("Жж"))+any("Жж"), ref(1)
67
+ sub capture(any("Хх"))+any("Хх"), ref(1)
68
+ sub capture(any("Чч"))+any("Чч"), ref(1)
69
+ sub capture(any("Шш"))+any("Шш"), ref(1)
70
+ sub capture(any("Щщ"))+any("Щщ"), ref(1)
71
+ sub capture(any("Юю"))+any("Юю"), ref(1)
72
+ sub capture(any("Яя"))+any("Яя"), ref(1)
73
+ # note[3]
74
+ sub any("\u044A\u044C"), none
75
+
76
+
77
+ # CHARACTERS
78
+ parallel {
79
+ sub "\u0410", "A" # А
80
+ sub "\u0411", "B" # Б
81
+ sub "\u0412", "V" # В
82
+ sub "\u0413", "G" # Г
83
+ sub "\u049C", "G" # Ҝ
84
+ sub "\u0492", "Gh" # Ғ
85
+ sub "\u0414", "D" # Д
86
+ sub "\u0415", "E" # Е
87
+ sub "\u0401", "Yo" # Ё
88
+ sub "\u04D8", "A" # Ә
89
+ sub "\u0416", "Zh" # Ж
90
+ sub "\u0417", "Z" # З
91
+ sub "\u0418", "I" # И
92
+ sub "\u0419", "Y" # Й
93
+ sub "\u0408", "Y" # Ј
94
+ sub "\u041A", "K" # К
95
+ sub "\u041B", "L" # Л
96
+ sub "\u041C", "M" # М
97
+ sub "\u041D", "N" # Н
98
+ sub "\u041E", "O" # О
99
+ sub "\u04E8", "O" # Ө
100
+ sub "\u041F", "P" # П
101
+ sub "\u0420", "R" # Р
102
+ sub "\u0421", "S" # С
103
+ sub "\u0422", "T" # Т
104
+ sub "\u0423", "U" # У
105
+ sub "\u04AE", "U" # Ү
106
+ sub "\u0424", "F" # Ф
107
+ sub "\u0425", "Kh" # Х
108
+ sub "\u04BA", "H" # Һ
109
+ sub "\u0427", "Ch" # Ч
110
+ sub "\u04B8", "J" # Ҹ
111
+ sub "\u0428", "Sh" # Ш
112
+ sub "\u0429", "Shch" # Щ
113
+ sub "\u042B", "Y" # Ы
114
+ sub "\u042D", "E" # Э
115
+ sub "\u042E", "Yu" # Ю
116
+ sub "\u042F", "Ya" # Я
117
+
118
+ sub "\u0430", "a" # а
119
+ sub "\u0431", "b" # б
120
+ sub "\u0432", "v" # в
121
+ sub "\u0433", "g" # г
122
+ sub "\u049D", "g" # ҝ
123
+ sub "\u0493", "gh" # ғ
124
+ sub "\u0434", "d" # д
125
+ sub "\u0435", "e" # e
126
+ sub "\u0451", "yo" # ё
127
+ sub "\u04D9", "a" # ә
128
+ sub "\u0436", "zh" # ж
129
+ sub "\u0437", "z" # з
130
+ sub "\u0438", "i" # и
131
+ sub "\u0439", "y" # й
132
+ sub "\u0458", "y" # ј
133
+ sub "\u043A", "k" # к
134
+ sub "\u043B", "l" # л
135
+ sub "\u043C", "m" # м
136
+ sub "\u043D", "n" # н
137
+ sub "\u043E", "o" # о
138
+ sub "\u04E9", "o" # ө
139
+ sub "\u043F", "p" # п
140
+ sub "\u0440", "r" # р
141
+ sub "\u0441", "s" # с
142
+ sub "\u0442", "t" # т
143
+ sub "\u0443", "u" # у
144
+ sub "\u04AF", "u" # ү
145
+ sub "\u0444", "f" # ф
146
+ sub "\u0445", "kh" # х
147
+ sub "\u04BB", "h" # һ
148
+ sub "\u0447", "ch" # ч
149
+ sub "\u04B9", "j" # ҹ
150
+ sub "\u0448", "sh" # ш
151
+ sub "\u0449", "shch" # щ
152
+ sub "\u044B", "y" # ы
153
+ sub "\u044D", "e" # э
154
+ sub "\u044E", "yu" # ю
155
+ sub "\u044F", "ya" # я
156
+ }
157
+
158
+ }