interscript 0.1.3 → 0.1.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (294) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +10 -11
  3. data/aliases.json +1 -0
  4. data/lib/interscript.rb +62 -59
  5. data/lib/interscript/command.rb +3 -2
  6. data/lib/interscript/fs.rb +96 -0
  7. data/lib/interscript/mapping.rb +36 -17
  8. data/lib/interscript/opal.rb +196 -0
  9. data/lib/interscript/opal/entrypoint.rb +20 -0
  10. data/lib/interscript/opal/exports.rb +11 -0
  11. data/lib/interscript/opal/maps.js.erb +8 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/maps/acadsin-zho-Hani-Latn-2002.yaml +6 -2
  14. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  15. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  16. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  17. data/maps/alalc-asm-Deva-Latn-1997.yaml +259 -0
  18. data/maps/alalc-asm-Deva-Latn-2012.yaml +55 -0
  19. data/maps/alalc-aze-Arab-Latn-1997.yaml +376 -0
  20. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  21. data/maps/{alalc-bel-cyrl-latn-1997.yaml → alalc-bel-Cyrl-Latn-1997.yaml} +7 -3
  22. data/maps/alalc-ben-Beng-Latn-1997.yaml +291 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +1 -1
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +5 -1
  25. data/maps/alalc-div-Thaa-Latn-1997.yaml +211 -0
  26. data/maps/alalc-ell-Grek-Latn-1997.yaml +7 -4
  27. data/maps/alalc-ell-Grek-Latn-2010.yaml +3 -5
  28. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  29. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  30. data/maps/alalc-hin-Deva-Latn-1997.yaml +303 -0
  31. data/maps/alalc-hin-Deva-Latn-2011.yaml +65 -0
  32. data/maps/alalc-kan-Kana-Latn-1997.yaml +274 -0
  33. data/maps/alalc-kan-Kana-Latn-2011.yaml +63 -0
  34. data/maps/alalc-kat-Geok-Latn-1997.yaml +2 -3
  35. data/maps/alalc-kat-Geor-Latn-1997.yaml +5 -1
  36. data/maps/alalc-kor-Hang-Latn-1997.yaml +6 -2
  37. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  38. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  39. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  40. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  41. data/maps/{alalc-mkd-cyrl-latn-1997.yaml → alalc-mkd-Cyrl-Latn-1997.yaml} +1 -1
  42. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +1 -1
  43. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  44. data/maps/alalc-ori-Orya-Latn-1997.yaml +284 -0
  45. data/maps/alalc-ori-Orya-Latn-2011.yaml +67 -0
  46. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  47. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  48. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  49. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  50. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  51. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  52. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  53. data/maps/alalc-san-Deva-Latn-2012.yaml +241 -0
  54. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  55. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  56. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +5 -1
  57. data/maps/{alalc-srp-cyrl-latn-2013.yaml → alalc-srp-Cyrl-Latn-2013.yaml} +1 -1
  58. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  59. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  60. data/maps/alalc-tel-Telu-Latn-1997.yaml +284 -0
  61. data/maps/alalc-tel-Telu-Latn-2011.yaml +64 -0
  62. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +5 -1
  63. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +1 -1
  64. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +1 -1
  65. data/maps/az-aze-Cyrl-Latn-1939.yaml +105 -0
  66. data/maps/az-aze-Cyrl-Latn-1958.yaml +45 -0
  67. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +1 -2
  68. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +1 -1
  69. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +1 -3
  70. data/maps/bgn-kor-Hang-Latn-1943.yaml +8 -4
  71. data/maps/bgn-kor-Kore-Latn-1943.yaml +4 -4
  72. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +1 -1
  73. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +1 -1
  74. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  75. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +598 -0
  76. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +1 -1
  77. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +111 -104
  78. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  79. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +329 -0
  80. data/maps/{bgnpcgn-bel-cyrl-latn-1979.yaml → bgnpcgn-bel-Cyrl-Latn-1979.yaml} +5 -1
  81. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +14 -10
  82. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +5 -1
  83. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  84. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +75 -0
  85. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +8 -5
  86. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +5 -2
  87. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +28 -0
  88. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  89. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +37 -0
  90. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +1 -1
  91. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +5 -1
  92. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +1 -2
  93. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +247 -0
  94. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +218 -0
  95. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +18 -18
  96. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +3 -3
  97. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +3 -3
  98. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +249 -0
  99. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +5 -1
  100. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +2 -2
  101. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  102. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  103. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +338 -0
  104. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +673 -0
  105. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  106. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +377 -0
  107. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  108. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +5 -1
  109. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +73 -0
  110. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +5 -1
  111. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  112. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  113. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +5 -2
  114. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +3 -92
  115. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +459 -0
  116. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  117. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  118. data/maps/{bgnpcgn-chn-Hans-Latn-1979.yaml → bgnpcgn-zho-Hans-Latn-1979.yaml} +1 -1
  119. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  120. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  121. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  122. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  123. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +173 -0
  124. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  125. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +175 -0
  126. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  127. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  128. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  129. data/maps/by-bel-Cyrl-Latn-1998.yaml +9 -5
  130. data/maps/by-bel-Cyrl-Latn-2007.yaml +4 -4
  131. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  132. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  133. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  134. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  135. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  136. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  137. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  138. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  139. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  140. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  141. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +7 -8
  142. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +6 -7
  143. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +2 -3
  144. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +2 -3
  145. data/maps/ggg-kat-Geor-Latn-2002.yaml +5 -2
  146. data/maps/{gki-bel-cyrl-latn-1992.yaml → gki-bel-Cyrl-Latn-1992.yaml} +2 -2
  147. data/maps/{gki-bel-cyrl-latn-2000.yaml → gki-bel-Cyrl-Latn-2000.yaml} +2 -2
  148. data/maps/{gost-rus-cyrl-latn-16876-71-1983.yaml → gost-rus-Cyrl-Latn-16876-71-1983.yaml} +8 -4
  149. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  150. data/maps/hk-yue-Hani-Latn-1888.yaml +1 -1
  151. data/maps/icao-bel-Cyrl-Latn-9303.yaml +1 -6
  152. data/maps/icao-bul-Cyrl-Latn-9303.yaml +1 -5
  153. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +1 -2
  154. data/maps/icao-heb-Hebr-Latn-9303.yaml +1 -1
  155. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +1 -1
  156. data/maps/icao-rus-Cyrl-Latn-9303.yaml +1 -2
  157. data/maps/icao-srp-Cyrl-Latn-9303.yaml +1 -1
  158. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +1 -2
  159. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  160. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  161. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  162. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +11 -8
  163. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +8 -5
  164. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  165. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  166. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  167. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +10 -6
  168. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +220 -0
  169. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  170. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  171. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  172. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  173. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  174. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  175. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  176. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  177. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  178. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  179. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  180. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  181. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  182. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  183. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +3 -4
  184. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  185. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  186. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  187. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +2 -2
  188. data/maps/kp-kor-Hang-Latn-2002.yaml +29 -21
  189. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +3 -3
  190. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  191. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  192. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +1 -1
  193. data/maps/moct-kor-Hang-Latn-2000.yaml +6 -2
  194. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +1 -1
  195. data/maps/mv-div-Thaa-Latn-1987.yaml +200 -0
  196. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  197. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  198. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  199. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  200. data/maps/odni-ara-Arab-Latn-2004.yaml +137 -0
  201. data/maps/odni-ara-Arab-Latn-2015.yaml +315 -0
  202. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  203. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  204. data/maps/odni-bul-Cyrl-Latn-2005.yaml +90 -0
  205. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  206. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  207. data/maps/odni-fas-Arab-Latn-2004.yaml +276 -0
  208. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  209. data/maps/odni-hin-Deva-Latn-2004.yaml +182 -0
  210. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  211. data/maps/odni-kat-Geor-Latn-2015.yaml +2 -3
  212. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  213. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  214. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  215. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +21 -0
  216. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  217. data/maps/odni-prs-Arab-Latn-2004.yaml +123 -0
  218. data/maps/odni-prs-Arab-Latn-2015.yaml +228 -0
  219. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  220. data/maps/odni-srp-Cyrl-Latn-2005.yaml +36 -0
  221. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  222. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  223. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  224. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +170 -0
  225. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  226. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +6 -2
  227. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  228. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  229. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +5 -5
  230. data/maps/royin-tha-Thai-Latn-1968.yaml +9 -5
  231. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +5 -5
  232. data/maps/royin-tha-Thai-Latn-1999.yaml +8 -4
  233. data/maps/sac-zho-Hans-Latn-1979.yaml +5 -1
  234. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  235. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  236. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  237. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +1 -1
  238. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +6 -2
  239. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  240. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  241. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  242. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  243. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  244. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  245. data/maps/un-asm-Beng-Latn-1972.yaml +223 -0
  246. data/maps/un-bel-Cyrl-Latn-2007.yaml +4 -4
  247. data/maps/un-ben-Beng-Latn-2016.yaml +1 -1
  248. data/maps/{un-ell-Grek-Latn-phonetic-1987.yaml → un-ell-Grek-Latn-1987-phonetic.yaml} +44 -44
  249. data/maps/un-ell-Grek-Latn-1987-tl.yaml +3 -4
  250. data/maps/un-ell-Grek-Latn-1987-ts.yaml +3 -4
  251. data/maps/un-guj-Gujr-Latn-1972.yaml +229 -0
  252. data/maps/un-hin-Deva-Latn-2016.yaml +316 -0
  253. data/maps/un-kan-Kana-Latn-2016.yaml +254 -0
  254. data/maps/un-mal-Mlym-Latn-1972.yaml +251 -0
  255. data/maps/un-mar-Deva-Latn-2016.yaml +102 -0
  256. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  257. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  258. data/maps/un-nep-Deva-Latn-1972.yaml +269 -0
  259. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  260. data/maps/un-ori-Orya-Latn-1972.yaml +247 -0
  261. data/maps/un-pan-Guru-Latn-1972.yaml +402 -0
  262. data/maps/un-prs-Arab-Latn-1967.yaml +236 -0
  263. data/maps/un-rus-Cyrl-Latn-1987.yaml +2 -2
  264. data/maps/un-tam-Taml-Latn-1972.yaml +194 -0
  265. data/maps/un-tel-Telu-Latn-1972.yaml +270 -0
  266. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  267. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  268. data/maps/un-urd-Arab-Latn-1972.yaml +405 -0
  269. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +466 -0
  270. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +76 -0
  271. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  272. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +1 -1
  273. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +1 -1
  274. data/maps/{nil-kor-Hang-Hang-jamo.yaml → var-kor-Hang-Hang-jamo.yaml} +4 -4
  275. data/maps/var-kor-Hang-Latn-mr-1939.yaml +4 -4
  276. data/maps/var-kor-Kore-Hang-2013.yaml +2 -2
  277. data/maps/var-kor-Kore-Latn-mr-1939.yaml +2 -3
  278. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  279. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  280. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  281. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  282. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  283. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  284. data/maps/var-tha-Thai-Thai-phonemic.yaml +6 -6
  285. data/maps/var-tha-Thai-Zsym-ipa.yaml +13 -13
  286. data/maps/{var-zho-Hani-Latn-1979.yaml → var-zho-Hani-Latn-wd-1979.yaml} +13 -9
  287. data/spec/interscript/filenames_spec.rb +21 -0
  288. data/spec/interscript_spec.rb +16 -5
  289. metadata +275 -27
  290. data/bin/interscript +0 -41
  291. data/bin/rspec +0 -29
  292. data/bin/setup +0 -8
  293. data/maps/un-mon-Mong-Latn-2013.yaml +0 -93
  294. data/maps/un-ukr-cyrl-latn-1998.yaml +0 -30
@@ -0,0 +1,259 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:asm
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Assamese Romanization, 1997
8
+ alias:
9
+ ogc11122:
10
+ code: asm_Deva2Latn_ALA_1997
11
+ description: Assamese ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/assamese.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Assamese
16
+
17
+ notes:
18
+
19
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
20
+ vowels following a consonant can be found in grammars; no distinction between the two is
21
+ made in transliteration.
22
+
23
+ - |
24
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
25
+ transliteration, with the following exceptions:
26
+
27
+ a) when another vowel is indicated by its appropriate sign; and
28
+ b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
29
+ birāma.
30
+
31
+ - Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
32
+ labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
33
+
34
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
35
+
36
+ tests:
37
+ - source: "অসমীয়া কবিতা"
38
+ expected: "asamīẏā kabitā"
39
+ - source: "কবিৰ আজি জন্মদিন"
40
+ expected: "kabira āji janmadina"
41
+ - source: "বেৰুটত এমাহৰ পাছতে পুনৰ ভয়ংকৰ অগ্নিকাণ্ড"
42
+ expected: "beruṭata emāhara pāchate punara bhayaṃkara agnikāṇḍa"
43
+ - source: "ভঙাৰ বিৰুদ্ধে আৱেদন দাখিল কংগনাৰ"
44
+ expected: "bhaṅāra biruddhe āwedana dākhila kaṃganāra"
45
+ - source: "আপুনি পঢ়ি ভাল পাব পৰা বাতৰি"
46
+ expected: "āpuni paṛhi bhāla pāba parā bātari"
47
+ - source: "শ্ৰীৰামপুৰত গৰুভৰ্তি ট্ৰাক জব্দ, দুজনক আটক"
48
+ expected: "śrīrāmapurata garubharti ṭrāka jabda, dujanaka āṭaka"
49
+ - source: "কেনে আছে প্ৰাক্তন"
50
+ expected: "kene āche prāktana"
51
+ - source: "কমুম্বাইৰ মেয়ৰৰ দেহত কোভিড পজিটিভ"
52
+ expected: "kamumbāira meẏarara dehata kobhiḍa pajiṭibha"
53
+ - source: "টুইটাৰযোগে খোদ সদৰী কৰে এই কথা"
54
+ expected: "ṭuiṭāraẏoge khoda sadarī kare ei kathā"
55
+ - source: "লখিমপুৰ জিলাৰ নাৰায়ণপুৰৰ বৰপথাৰত আজি প্ৰশান্তি ধাম নামেৰে এখন বৃদ্ধাশ্ৰমৰ শুভাৰম্ভ কৰা হয়"
56
+ expected: "lakhimapura jilāra nārāẏaṇapurara barapathārata āji praśānti dhāma nāmere ekhana bṛddhāśramara śubhārambha karā haẏa"
57
+
58
+
59
+ map:
60
+
61
+ rules:
62
+ # note[2]
63
+ - pattern: (ক=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
64
+ result: 'k'
65
+ - pattern: (খ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
66
+ result: 'kh'
67
+ - pattern: (গ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
68
+ result: 'g'
69
+ - pattern: (ঘ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
70
+ result: 'gh'
71
+ - pattern: (ঙ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
72
+ result: 'ṅ'
73
+ - pattern: (চ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
74
+ result: 'c'
75
+ - pattern: (ছ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
76
+ result: 'ch'
77
+ - pattern: (জ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
78
+ result: 'j'
79
+ - pattern: (ঝ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
80
+ result: 'jh'
81
+ - pattern: (ঞ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
82
+ result: 'ñ'
83
+ - pattern: (ট=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
84
+ result: 'ṭ'
85
+ - pattern: (ঠ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
86
+ result: 'ṭh'
87
+ - pattern: (ড=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
88
+ result: 'ḍ'
89
+ - pattern: (ড়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
90
+ result: 'ṛ'
91
+ - pattern: (ঢ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
92
+ result: 'ḍh'
93
+ - pattern: (ঢ়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
94
+ result: 'ṛh'
95
+ - pattern: (ণ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
96
+ result: 'ṇ'
97
+ - pattern: (ত=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
98
+ result: 't'
99
+ - pattern: (ৎ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
100
+ result: 'ṭ'
101
+ - pattern: (থ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
102
+ result: 'th'
103
+ - pattern: (দ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
104
+ result: 'd'
105
+ - pattern: (ধ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
106
+ result: 'dh'
107
+ - pattern: (ন=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
108
+ result: 'n'
109
+ - pattern: (প=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
110
+ result: 'p'
111
+ - pattern: (ফ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
112
+ result: 'ph'
113
+ - pattern: (ব=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
114
+ result: 'b'
115
+ - pattern: (ভ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
116
+ result: 'bh'
117
+ - pattern: (ম=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
118
+ result: 'm'
119
+ - pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
120
+ result: 'y'
121
+ - pattern: (য=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
122
+ result: 'ẏ'
123
+ - pattern: (য়=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
124
+ result: 'ẏ'
125
+ - pattern: (ৰ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
126
+ result: 'r'
127
+ - pattern: (ল=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
128
+ result: 'l'
129
+ - pattern: (ৱ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
130
+ result: 'w'
131
+ - pattern: (শ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
132
+ result: 'ś'
133
+ - pattern: (ষ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
134
+ result: 'sh'
135
+ - pattern: (স=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
136
+ result: 's'
137
+ - pattern: (হ=?)(?=[\u09be\u09bf\u09c0\u09c1\u09c2\u09c3\u09c7\u09c8\u09cb\u09cc\u094d\u09cd])
138
+ result: 'h'
139
+ # note[3]
140
+ - pattern: \u0981(?=[কখগঘঙচছজঝঞটঠডড়ঢঢ়ণতৎথদধন]) # ঁ before guttural, palatal, cerebral, and dental
141
+ result: ṅ
142
+
143
+ characters:
144
+
145
+ #Vowels and Diphthongs (see Note 1)
146
+
147
+ 'অ': 'a'
148
+ 'আ': 'ā'
149
+ 'ই': 'i'
150
+ 'ঈ': 'ī'
151
+ 'উ': 'u'
152
+ 'ঊ': 'ū'
153
+ 'ঋ': 'ṛ'
154
+ 'ৠ': 'ṝ'
155
+ 'ঌ': 'ḹ'
156
+ 'এ': 'e'
157
+ 'ঐ': 'ai'
158
+ 'ও': 'o'
159
+ 'ঔ': 'au'
160
+
161
+ # Consonant characters
162
+
163
+ #Gutturals
164
+ 'ক': 'ka'
165
+ 'খ': 'kha'
166
+ 'গ': 'ga'
167
+ 'ঘ': 'gha'
168
+ 'ঙ': 'ṅa'
169
+
170
+ #Palatals
171
+ 'চ': 'ca'
172
+ 'ছ': 'cha'
173
+ 'জ': 'ja'
174
+ 'ঝ': 'jha'
175
+ 'ঞ': 'ña'
176
+
177
+ #Cerebrals
178
+ 'ট': 'ṭa'
179
+ 'ঠ': 'ṭha'
180
+ 'ড': 'ḍa'
181
+ 'ড়': 'ṛa'
182
+ 'ঢ': 'ḍha'
183
+ 'ঢ়': 'ṛha'
184
+ 'ণ': 'ṇa'
185
+
186
+ #Dentals
187
+ 'ত': 'ta'
188
+ 'ৎ': 'ṭ'
189
+ 'থ': 'tha'
190
+ 'দ': 'da'
191
+ 'ধ': 'dha'
192
+ 'ন': 'na'
193
+
194
+ #Labials
195
+ 'প': 'pa'
196
+ 'ফ': 'pha'
197
+ 'ব': 'ba'
198
+ 'ভ': 'bha'
199
+ 'ম': 'ma'
200
+
201
+ #Semivowels
202
+ 'য়': 'ya'
203
+ 'য': 'ẏa'
204
+ 'য়': 'ẏa'
205
+ 'ৰ': 'ra'
206
+ 'ল': 'la'
207
+ 'ৱ': 'wa'
208
+
209
+ #Sibilants
210
+ 'শ': 'śa'
211
+ 'ষ': 'sha'
212
+ 'স': 'sa'
213
+
214
+ #Aspirate
215
+ 'হ': 'ha'
216
+
217
+
218
+ # Anusvāra
219
+ 'ং': 'ṃ'
220
+
221
+ # Bisarga
222
+ 'ঃ': 'ḥ'
223
+
224
+ # Candrabindu (see Note 3)
225
+ 'ঁ': 'm̐'
226
+
227
+ # Abagraha (see Note 4)
228
+ 'ऽ': '’' # (apostrophe)
229
+
230
+ # Medials # Needed for connecting constants
231
+ '\u09be': 'ā'
232
+ '\u09bf': 'i'
233
+ '\u09c0': 'ī'
234
+ '\u09c1': 'u'
235
+ '\u09c2': 'ū'
236
+ '\u09c3': 'ṛ'
237
+ '\u09c7': 'e'
238
+ '\u09c8': 'ai'
239
+ '\u09cb': 'o'
240
+ '\u09cc': 'au'
241
+ '।': '.'
242
+ '्': ''
243
+ '\u09CD': '' # Used for joining
244
+
245
+ # Digits
246
+
247
+ '১': '1'
248
+ '২': '2'
249
+ '৩': '3'
250
+ '৪': '4'
251
+ '৫': '5'
252
+ '৬': '6'
253
+ '৭': '7'
254
+ '৮': '8'
255
+ '৯': '9'
256
+ '০': '0'
257
+
258
+
259
+
@@ -0,0 +1,55 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2012
4
+ language: iso-639-2:asm
5
+ source_script: Deva
6
+ destination_script: Latn
7
+ name: Assamese Romanization, 2012
8
+ url: https://www.loc.gov/catdir/cpso/romanization/assamese.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Assamese
12
+
13
+ notes:
14
+
15
+ - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
16
+ vowels following a consonant can be found in grammars; no distinction between the two is
17
+ made in transliteration.
18
+
19
+ - |
20
+ The vowel a is implicit after all consonants and consonant clusters and is supplied in
21
+ transliteration, with the following exceptions:
22
+
23
+ a) when another vowel is indicated by its appropriate sign; and
24
+ b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
25
+ birāma.
26
+
27
+ - Candrabindu before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
28
+ labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.
29
+
30
+ - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).
31
+
32
+ tests:
33
+ - source: "ৰাজ্যিক স্বাস্থ্য মন্ত্ৰী পীয়ুষ হাজৰিকাৰ বিৰুদ্ধে দাখিল কৰা হৈছে এজাহাৰ।"
34
+ expected: "rājẏika sbāsthẏa mantrī pīyusha hājarikāra biruddhe dākhila karā haiche ejāhāra."
35
+ - source: "কোৰোনা মহামাৰীৰ এই সময়ত সভাখনত হাজাৰ হাজাৰ লোকে মাস্ক পৰিধান নকৰাৰ লগতে সামাজিক দূৰত্ব নমনাৰ অভিযোগ উত্থাপন কৰা হৈছে"
36
+ expected: "koronā mahāmārīra ei samayata sabhākhanata hājāra hājāra loke māska paridhāna nakarāra lagate sāmājika dūratba namanāra abhiẏoga utthāpana karā haiche"
37
+ - source: "হাওৰাঘাটৰ গ্ৰামীণ বিকাশ বেংক হিতাধিকাৰীৰ পৰা উৎকোচ লৈ গ্ৰেপ্তাৰ বিজেপি কৰ্মী যীচু কেম্পাই"
38
+ expected: "hāorāghāṭara grāmīṇa bikāśa beṃka hitādhikārīra parā uṭkoca lai greptāra bijepi karmī ẏīcu kempāi"
39
+ - source: "জ্যেষ্ঠ সাংবাদিক পৰাগ ভূঞাৰ মৃত্যুক লৈ তদন্ত আৰম্ভ চিআইডিৰ"
40
+ expected: "jẏeshṭha sāṃbādika parāga bhūñāra mṛtẏuka lai tadanta ārambha ciāiḍira"
41
+ - source: "সাংবাদিক পৰাগ ভূঞাৰ মৃত্যুৰ উচিত তদন্তৰ দাবীত নলবাৰীত অৱস্থান ধৰ্মঘট"
42
+ expected: "sāṃbādika parāga bhūñāra mṛtẏura ucita tadantara dābīta nalabārīta awasthāna dharmaghaṭa"
43
+ - source: "দৰঙৰ বিভিন্ন অঞ্চলত মানসিক ৰোগৰ সজাগতামূলক বাটৰ নাট প্ৰদৰ্শন"
44
+ expected: "daraṅara bibhinna añcalata mānasika rogara sajāgatāmūlaka bāṭara nāṭa pradarśana"
45
+ - source: "অযোধ্যাত দীপাৱলীঃ ৫.৮৬ লাখ মাটি চাকি জ্বলাই গঢ়িলে গিনিজ ৱ’ৰ্ল্ড ৰেকৰ্ড"
46
+ expected: "aẏodhẏāta dīpāwalīḥ 5.86 lākha māṭi cāki jbalāi gaḍha়ile ginija wa’rlḍa rekarḍa"
47
+ - source: "ৰাজ্যত আকৌ ২৩৩ জন কোভিড পজিটিভ, সুস্থ হৈছে ৬৪২ জন"
48
+ expected: "rājẏata ākau 233 jana kobhiḍa pajiṭibha, sustha haiche 642 jana"
49
+ - source: "এতিয়ালৈকে ৰাজ্যত এই ভাইৰাছত আক্ৰান্ত লোকৰ সংখ্যা ২১০০৬৮জনলৈ পাইছে বৃদ্ধি।"
50
+ expected: "etiyālaike rājẏata ei bhāirāchata ākrānta lokara saṃkhẏā 210068janalai pāiche bṛddhi."
51
+ - source: "এতিয়ালৈকে ৰাজ্যত কোৰোনাত আক্ৰান্ত হৈ ৯৫৮জন লোক হেৰুৱাইছে প্ৰাণ।"
52
+ expected: "etiyālaike rājẏata koronāta ākrānta hai 958jana loka heruwāiche prāṇa."
53
+ map:
54
+
55
+ inherit: "alalc-asm-Deva-Latn-1997"
@@ -0,0 +1,376 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:aze
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- azerbaij (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: aze_Arab2Latn_ALA_1997
11
+ description: Arabic ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/azerbaij.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Arabic
16
+
17
+ notes:
18
+ - 1. As seen in the examples above, vowel harmony, which is
19
+ found in Modern Turkish, applies to Azerbaijani as well.
20
+
21
+ - 2. The letter ى in final position may represent the long
22
+ vowel romanized á, in addition to the
23
+ vowels romanized i and 1. This occurs in Arabic names, such
24
+ as Mustafá
25
+ al-Musanná
26
+
27
+ - 3. Vowel points are used sparingly in Azerbaijani
28
+ publications. For romanization, they must be
29
+ supplied from a dictionary.
30
+
31
+ - Rule 1 ء hamza
32
+ (a) When initial, ء is not represented in romanization
33
+ üzdah أوزدة
34
+
35
+ (b) When medial or final in words of Perso-Arabic origin, 9 is
36
+ romanized as ’ (alit), except when it accompanies the
37
+ phonetic sound e (as in men), in which case it is romanized
38
+ by e.
39
+ mas’alah مَسئَلة
40
+ gecah كئجة
41
+ necah نئجة
42
+
43
+ - Rule 2 ˜ (maddah)
44
+
45
+ (a) Initial آ is romanized ā.
46
+
47
+ ādām آدام
48
+
49
+ (b) Medial آ, when it represents the phonetic combination ’ā, is so romanized.
50
+
51
+ Heydar'ãbãd حيدَرآبَاد
52
+
53
+ - Rule 3 ّ
54
+ (shaddah or tashdid) is represented by doubling the letter or digraph concerned.
55
+ sãqqãl سَاقَّال
56
+
57
+ Note the exceptional case where ّ is written over و and ي to represent
58
+ the combination of long vowel plus consonants.
59
+ madaníyat مَدَنِيَّت
60
+
61
+ - |
62
+ Rule 4 Tanvīn (written form ٌ, ً (ًا), or ٍ ) which occurs chiefly in Arabic words,
63
+ is romanized un, in, an, and an, respectively.
64
+
65
+ (a) When it occurs in indefinite nouns derived from defective roots.
66
+
67
+ qāḍin قاضٍ
68
+ ma‘nan معنىً
69
+
70
+ (b) When it indicates the adverbial use of a noun or adjective.
71
+
72
+ ṭab‘an طبعًا
73
+ faj’atan فجأةً
74
+ al-Mushtarik waḍ‘an المشترك وضعاً
75
+ wa-al-muftariq ṣuq‘an والمفترق صقعاً
76
+
77
+ - ة in a word in the construct state is romanized t. See rule 7(b).
78
+
79
+ - The consonant letter ö at the end of Arabic words in the
80
+ genetive construction (izãfah) is romanized by t.
81
+
82
+ takmilat al-axbãr تَكمِلَة الأخبَار
83
+
84
+ # Grammatical Structure as It Affects Romanization
85
+ - Rule 6 izãfah. When two Persian words are used in an Azerbaijani
86
+ context in a relationship known as izãfah, the first word (
87
+ the muzãf) is followed by an additional letter or syllable
88
+ in romanization. This is added according to the following
89
+ rules
90
+ (a) When the muzaf bears no special mark of izãfah, it is
91
+ followed by -i.
92
+ Sazman-i tabligãti-Islãm سازمان تبليغات اسلامي
93
+
94
+ (b) When the muzãf is marked by the addition of 9, it is followed by -'i.
95
+ Nãbigah-'i dahr نابغة دَهر
96
+
97
+ (c) When the muzãf is marked by the addition of û, it is followed by -yi.
98
+ darya-yi nur دَريَاي نُور
99
+
100
+ (d) izãfah is represented in romanization of personal names only when
101
+ implied in the Persian script.
102
+ Mucír-i BeylaqãnT مَجير بيلقاني
103
+ Maktabí-i Sírãzí مكتبي شيرازي
104
+
105
+ # Affixes and Compounds
106
+ - Rule 7 Affixes.
107
+ (a) When the affix and the word with it is connected grammatically are
108
+ written separately in Azerbaijani, the two are separated in romanization
109
+ by a single prime(').
110
+
111
+ (b) The Arabic article al is separated by a hyphen, in romanization,
112
+ from the word to which it is prefixed.
113
+
114
+ - Rule 8 Compounds.
115
+ When the elements of a compound (except a compound personal name)
116
+ are written separately in Azerbaijani, they are separated in
117
+ romanization by a single prime(').
118
+ # Orthography of Azerbaijani in Romanization
119
+
120
+ - Rule 9 Capitalization
121
+
122
+ (a) Rules for the capitalization of English are followed, except that
123
+ the Arabic article al, is lower cased in all positions.
124
+
125
+ (b) Diacritics are used with both upper and lower case letters in romanization.
126
+
127
+ - Rule 10 Foreign words.
128
+ Foreign words in an Azerbaijani context, including Persian and Arabic words,
129
+ are romanized according to the rules for Azerbaijani. For short vowels not
130
+ indicated in the script, the Azerbaijani vowels nearest the original
131
+ pronunciation of the word are supplied in romanization.
132
+
133
+ tests:
134
+ - source: بَرَكَت
135
+ expected: Barakat
136
+
137
+ - source: سَاحِل
138
+ expected: Sāḥil
139
+
140
+ - source: بَادِمجَان
141
+ expected: Bādimcān
142
+
143
+ - source: قُدرَت
144
+ expected: Qudrat
145
+
146
+ - source: بُوغَا
147
+ expected: Būğā
148
+
149
+ - source: آرَام
150
+ expected: Ārām
151
+
152
+ - source: اِئنلِي
153
+ expected: Enlī
154
+
155
+ - source: دَلِيل
156
+ expected: Dalīl
157
+
158
+ - source: قَارَانلِيق
159
+ expected: Qārānlīq
160
+
161
+ - source: اِيش
162
+ expected: Īş
163
+
164
+ - source: اِيشِيق
165
+ expected: Īşīq
166
+
167
+ - source: اُون
168
+ expected: 'On'
169
+
170
+ - source: ُاون
171
+ expected: Ūn
172
+
173
+ - source: ُاؤن
174
+ expected: Ön
175
+
176
+ # - source: ُأوزُوم
177
+ # expected: üzūm
178
+
179
+ - source: اَيْوَان
180
+ expected: Eyvān
181
+
182
+ - source: اَوحَدِي
183
+ expected: Awḥadī
184
+
185
+ - source: َاوَّل
186
+ expected: Avval
187
+
188
+ - source: طَهي
189
+ expected: Ṭahy
190
+
191
+ # From Rule 1 - part a
192
+
193
+ - source: ُأوزدَة
194
+ expected: Üzdah
195
+
196
+ # From Rule 1 - part b
197
+
198
+ - source: مَسئَلَة
199
+ expected: Mas’alah
200
+
201
+ - source: گِئجَة
202
+ expected: Gecah
203
+
204
+ - source: نِئچَة
205
+ expected: Neçah
206
+
207
+ # From Rule 2 - part a
208
+ - source: آدَام
209
+ expected: Ādām
210
+
211
+ # From Rule 2 - part b
212
+ - source: حَيْدَرآبَاد
213
+ expected: Ḥeydar’ābād
214
+
215
+ # From Rule 3
216
+ - source: سَاقَّال
217
+ expected: Sāqqāl
218
+
219
+ - source: مَدَنِيَّت
220
+ expected: Madanīyat
221
+
222
+ # From Rule 5
223
+
224
+ - source: تَكمِلَة الأَخبَار
225
+ expected: Takmilat al-Axbār
226
+
227
+
228
+ map:
229
+ postrules:
230
+ - pattern: (?<=\b)(?<!\b[‘|’|'])[\u0061-\uFFFF]
231
+ result: "upcase"
232
+
233
+ - pattern : '\bAl' # الت
234
+ result: 'al'
235
+
236
+ characters:
237
+
238
+ '\u0628\u0651': 'bb' # ب
239
+ '\u067E\u0651': 'pp' # پ
240
+ '\u062A\u0651': 'tt' # ت
241
+ '\u062b\u0651': 's̱s̱' # ث
242
+ '\u062C\u0651': 'cc' # ج
243
+ '\u0686\u0651': 'çç' # چ
244
+ '\u062d\u0651': 'ḥḥ' # ح
245
+ '\u062E\u0651': 'xx' # خ
246
+ '\u062F\u0651': 'dd' # د
247
+ '\u0630\u0651': 'ẕẕ' # ذ
248
+ '\u0631\u0651': 'rr' # ر
249
+ '\u0632\u0651': 'zz' # ز
250
+ '\u0698\u0651': 'jj' # ژ
251
+ '\u0633\u0651': 'ss' # س
252
+ '\u0634\u0651': 'şş' # ش
253
+ '\u0635\u0651': 'ṣṣ' # ص
254
+ '\u0636\u0651': 'z̤z̤' # ض
255
+ '\u0637\u0651': 'ṭṭ' # ط
256
+ '\u0638\u0651': 'ẓẓ' # ظ
257
+ '\u0639\u0651': '‘‘' # ع
258
+ '\u063A\u0651': 'ğğ' # غ
259
+ '\u0341\u0651': 'ff' # ف
260
+ '\u0642\u0651': 'qq' # ق
261
+ '\u06A9\u0651': 'kk' # ك
262
+ '\u0643\u0651': 'kk' # ك
263
+ '\u06AF\u0651': 'gg' # گ
264
+ '\u0644\u0651': 'll' # ل
265
+ '\u0645\u0651': 'mm' # م
266
+ '\u0646\u0651': 'nn' # ن
267
+ '\u0648\u0651': 'vv' # و
268
+ '\u0647\u0651': 'hh' # ه
269
+ '\u064A\u0651': 'yy' # ي
270
+
271
+ '\u060c': ',' # ،
272
+
273
+ '\u0627\u0644\u0644\u0651\u064e\u0647': "Allāh"
274
+
275
+ '\b\u0627\u0644' : 'al-' # ال
276
+
277
+ '\u0628': 'b' # ب
278
+ '\u067E': 'p' # پ
279
+ '\u062A': 't' # ت
280
+ '\u062b': 's̱' # ث
281
+ '\u062C': 'c' # ج
282
+ '\u0686': 'ç' # چ
283
+ '\u062d': 'ḥ' # ح
284
+ '\u062E': 'x' # خ
285
+ '\u062F': 'd' # د
286
+ '\u0630': 'ẕ' # ذ
287
+ '\u0631': 'r' # ر
288
+ '\u0632': 'z' # ز
289
+ '\u0698': 'j' # ژ
290
+ '\u0633': 's' # س
291
+ '\u0634': 'ş' # ش
292
+ '\u0635': 'ṣ' # ص
293
+ '\u0636': 'z̤' # ض
294
+ '\u0637': 'ṭ' # ط
295
+ '\u0638': 'ẓ' # ظ
296
+ '\u0639': '‘' # ع
297
+ '\u063A': 'ğ' # غ
298
+ '\u0341': 'f' # ف
299
+ '\u0642': 'q' # ق
300
+ '\u06A9': 'k' # ك
301
+ '\u0643': 'k' # ك
302
+ '\u06AF': 'g' # گ
303
+ '\u0644': 'l' # ل
304
+ '\u0645': 'm' # م
305
+ '\u0646': 'n' # ن
306
+ '\u0648': 'v' # و
307
+ '\u0647': 'h' # ه
308
+ '\u064A': 'y' # ي
309
+
310
+ # Vowels and Diphthongs
311
+ '\u064e': 'a'
312
+ '\u0650': 'i'
313
+ '\u064f': 'u'
314
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
315
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
316
+ '\u0622' : 'ā' # آ
317
+ '\u0650[\u0621|\u0623|\u0624|\u0626]' : 'e' # ـِأ kasra followed by hamza
318
+ '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
319
+ '\u0650\u0627\u064a' : 'ī' # ـِي kasra followed by ي
320
+ '[\u064f]?\u0627\u064f\u0648' : 'o' # ـُاُو
321
+ '\u064f\u0627\u0648' : 'ū' # ـُاو
322
+ '\u064f\u0627\u0624' : 'ö' # ـُاؤ
323
+ '\u064f\u0623\u0648' : 'ü' # ـُأو
324
+ '\u064e\u064a\u0652' : 'ey' # ـَيْ
325
+ '\u064e\u0648\u0652?' : 'aw' # ـَوْ
326
+ '\b\u0627' : '' # ا
327
+ '\b\u064e\u0627': 'a' # ـَا
328
+ '\b\u0650\u0627': 'i' # ـِا
329
+
330
+ # hamzah
331
+ '\b\u0623' : '' # أ
332
+ '\u0623' : '’' # أ
333
+ '\u0624': '’' # ؤ
334
+ '\u0626' : "’" # ئ
335
+ '\b\u0622' : 'ā' # آ
336
+ '(?<!\b\u0627\u0644)(?<!\b)\u0622(?![\b|\u0621])' : '’ā' # آ in middle, not final, or initial, or after ال
337
+ '\u064e\u0622' : 'ā' # ـَآ fatha followed by ا
338
+ '\u0622' : '' # آ
339
+
340
+ # Rule 3 - shadda
341
+ '\u0650\u064a\u0651' : 'īy' # ـِيَّ
342
+ '\u064f\u0648\u0651' : 'ūw' # ـَوّ damma followed by و with shadda
343
+ '\u0650\u064a\u0651\b' : 'ī' # ـِيَّ
344
+ '\u064e\u0648\u0651' : 'aww' # ـَوّ fatha followed by و with shadda
345
+ '\u064e\u064a\u0651' : 'ayy' # ـَيّ fatha followed by و with shadda
346
+
347
+ # Rule 4 - tanvin
348
+ '\u064c': 'un' # ٌ
349
+ '\u064b': 'an' # ً
350
+ '\u064d': 'in' # ٍ
351
+ # tanween should be onb the letter preceeding the end in case of ا, ى
352
+ # however, it's common that people mistake that, so we're handling both orders
353
+ '\u064b\u0649': 'an' # ً
354
+ '\u064b\u0627': 'an' # ً
355
+ '\u0649\u064b': 'an' # ً
356
+ '\u0627\u064b': 'an' # ً
357
+
358
+ # Rule 5 ta' marboota
359
+ '\u0629' : 't' # ة in the middle of the sentence
360
+ '\u0629$' : 'h'
361
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'h'
362
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'h'
363
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'h'
364
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'h'
365
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'h'
366
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'h'
367
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'h'
368
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'h'
369
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'h'
370
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'h'
371
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'h'
372
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'h'
373
+
374
+
375
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
376
+ '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي