interscript-maps 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (291) hide show
  1. checksums.yaml +7 -0
  2. data/README.adoc +28 -0
  3. data/interscript-maps.gemspec +28 -0
  4. data/interscript-maps.yaml +235 -0
  5. data/libs/posix.iml +11 -0
  6. data/libs/unicode.iml +13 -0
  7. data/libs/var-Cyrl.iml +7 -0
  8. data/libs/var-kor.iml +17 -0
  9. data/maps-staging/royin-tha-Thai-Latn-1939-generic.imp +98 -0
  10. data/maps-staging/royin-tha-Thai-Latn-1968.imp +156 -0
  11. data/maps-staging/royin-tha-Thai-Latn-1999-chained.imp +161 -0
  12. data/maps-staging/royin-tha-Thai-Latn-1999.imp +78 -0
  13. data/maps-staging/var-tha-Thai-Thai-phonemic.imp +53 -0
  14. data/maps-staging/var-tha-Thai-Zsym-ipa.imp +273 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.imp +27515 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.imp +392 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.imp +85 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.imp +1171 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.imp +214 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.imp +53 -0
  21. data/maps/alalc-aze-Arab-Latn-1997.imp +321 -0
  22. data/maps/alalc-aze-Cyrl-Latn-1997.imp +101 -0
  23. data/maps/alalc-bel-Cyrl-Latn-1997.imp +118 -0
  24. data/maps/alalc-ben-Beng-Latn-1997.imp +225 -0
  25. data/maps/alalc-ben-Beng-Latn-2017.imp +135 -0
  26. data/maps/alalc-bul-Cyrl-Latn-1997.imp +110 -0
  27. data/maps/alalc-div-Thaa-Latn-1997.imp +171 -0
  28. data/maps/alalc-ell-Grek-Latn-1997.imp +381 -0
  29. data/maps/alalc-ell-Grek-Latn-2010.imp +382 -0
  30. data/maps/alalc-guj-Gujr-Latn-1997.imp +223 -0
  31. data/maps/alalc-guj-Gujr-Latn-2011.imp +57 -0
  32. data/maps/alalc-hin-Deva-Latn-1997.imp +248 -0
  33. data/maps/alalc-hin-Deva-Latn-2011.imp +63 -0
  34. data/maps/alalc-kan-Kana-Latn-1997.imp +233 -0
  35. data/maps/alalc-kan-Kana-Latn-2011.imp +58 -0
  36. data/maps/alalc-kat-Geok-Latn-1997.imp +109 -0
  37. data/maps/alalc-kat-Geor-Latn-1997.imp +104 -0
  38. data/maps/alalc-kor-Hang-Latn-1997.imp +68 -0
  39. data/maps/alalc-mal-Mlym-Latn-1997.imp +260 -0
  40. data/maps/alalc-mal-Mlym-Latn-2012.imp +65 -0
  41. data/maps/alalc-mar-Deva-Latn-1997.imp +178 -0
  42. data/maps/alalc-mar-Deva-Latn-2011.imp +51 -0
  43. data/maps/alalc-mkd-Cyrl-Latn-1997.imp +125 -0
  44. data/maps/alalc-mkd-Cyrl-Latn-2013.imp +113 -0
  45. data/maps/alalc-mon-Cyrl-Latn-1997.imp +161 -0
  46. data/maps/alalc-ori-Orya-Latn-1997.imp +234 -0
  47. data/maps/alalc-ori-Orya-Latn-2011.imp +59 -0
  48. data/maps/alalc-pan-Guru-Latn-1997.imp +241 -0
  49. data/maps/alalc-pan-Guru-Latn-2011.imp +71 -0
  50. data/maps/alalc-per-Arab-Latn-1997.imp +318 -0
  51. data/maps/alalc-pli-Deva-Latn-2012.imp +140 -0
  52. data/maps/alalc-pra-Deva-Latn-2012.imp +52 -0
  53. data/maps/alalc-rus-Cyrl-Latn-1997.imp +165 -0
  54. data/maps/alalc-rus-Cyrl-Latn-2012.imp +107 -0
  55. data/maps/alalc-san-Deva-Latn-2012.imp +207 -0
  56. data/maps/alalc-sin-Sinh-Latn-1997.imp +246 -0
  57. data/maps/alalc-sin-Sinh-Latn-2011.imp +63 -0
  58. data/maps/alalc-srp-Cyrl-Latn-1997.imp +124 -0
  59. data/maps/alalc-srp-Cyrl-Latn-2013.imp +115 -0
  60. data/maps/alalc-tam-Taml-Latn-1997.imp +52 -0
  61. data/maps/alalc-tam-Taml-Latn-2011.imp +49 -0
  62. data/maps/alalc-tel-Telu-Latn-1997.imp +237 -0
  63. data/maps/alalc-tel-Telu-Latn-2011.imp +58 -0
  64. data/maps/alalc-ukr-Cyrl-Latn-1997.imp +123 -0
  65. data/maps/alalc-ukr-Cyrl-Latn-2011.imp +32 -0
  66. data/maps/apcbg-bul-Cyrl-Latn-1995.imp +194 -0
  67. data/maps/az-aze-Cyrl-Latn-1939.imp +105 -0
  68. data/maps/az-aze-Cyrl-Latn-1958.imp +50 -0
  69. data/maps/bas-rus-Cyrl-Latn-2017-bss.imp +160 -0
  70. data/maps/bas-rus-Cyrl-Latn-2017-oss.imp +165 -0
  71. data/maps/bgn-jpn-Hrkt-Latn-1962.imp +288 -0
  72. data/maps/bgn-kor-Hang-Latn-1943.imp +31 -0
  73. data/maps/bgn-kor-Kore-Latn-1943.imp +33 -0
  74. data/maps/bgna-bul-Cyrl-Latn-2006.imp +119 -0
  75. data/maps/bgna-bul-Cyrl-Latn-2009.imp +119 -0
  76. data/maps/bgnpcgn-amh-Ethi-Latn-1967.imp +393 -0
  77. data/maps/bgnpcgn-ara-Arab-Latn-1956.imp +472 -0
  78. data/maps/bgnpcgn-arm-Armn-Latn-1981.imp +125 -0
  79. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.imp +111 -0
  80. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.imp +169 -0
  81. data/maps/bgnpcgn-bal-Arab-Latn-2008.imp +296 -0
  82. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.imp +200 -0
  83. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.imp +137 -0
  84. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.imp +38 -0
  85. data/maps/bgnpcgn-che-Cyrl-Latn-2008.imp +176 -0
  86. data/maps/bgnpcgn-deu-Latn-Latn-2000.imp +56 -0
  87. data/maps/bgnpcgn-div-Thaa-Latn-1972.imp +90 -0
  88. data/maps/bgnpcgn-div-Thaa-Latn-1988.imp +71 -0
  89. data/maps/bgnpcgn-ell-Grek-Latn-1962.imp +443 -0
  90. data/maps/bgnpcgn-ell-Grek-Latn-1996.imp +269 -0
  91. data/maps/bgnpcgn-fao-Latn-Latn-1964.imp +41 -0
  92. data/maps/bgnpcgn-fao-Latn-Latn-1968.imp +28 -0
  93. data/maps/bgnpcgn-fas-Arab-Latn-1956.imp +111 -0
  94. data/maps/bgnpcgn-isl-Latn-Latn-1964.imp +42 -0
  95. data/maps/bgnpcgn-isl-Latn-Latn-1968.imp +32 -0
  96. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.imp +191 -0
  97. data/maps/bgnpcgn-kat-Geor-Latn-1981.imp +116 -0
  98. data/maps/bgnpcgn-kat-Geor-Latn-2009.imp +43 -0
  99. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.imp +193 -0
  100. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.imp +170 -0
  101. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.imp +177 -0
  102. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.imp +40 -0
  103. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.imp +41 -0
  104. data/maps/bgnpcgn-kur-Arab-Latn-2007.imp +240 -0
  105. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.imp +132 -0
  106. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.imp +174 -0
  107. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.imp +168 -0
  108. data/maps/bgnpcgn-nep-Deva-Latn-2011.imp +208 -0
  109. data/maps/bgnpcgn-per-Arab-Latn-1958.imp +312 -0
  110. data/maps/bgnpcgn-prs-Arab-Latn-2007.imp +552 -0
  111. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.imp +445 -0
  112. data/maps/bgnpcgn-pus-Arab-Latn-1968.imp +289 -0
  113. data/maps/bgnpcgn-ron-cyrl-latn-2002.imp +165 -0
  114. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.imp +133 -0
  115. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.imp +195 -0
  116. data/maps/bgnpcgn-sme-Latn-Latn-1984.imp +48 -0
  117. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.imp +55 -0
  118. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.imp +146 -0
  119. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.imp +185 -0
  120. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.imp +188 -0
  121. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.imp +136 -0
  122. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.imp +88 -0
  123. data/maps/bgnpcgn-urd-Arab-Latn-2007.imp +333 -0
  124. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.imp +145 -0
  125. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.imp +74 -0
  126. data/maps/bgnpcgn-zho-Hans-Latn-1979.imp +7463 -0
  127. data/maps/bis-asm-Beng-Latn-13194-1991.imp +154 -0
  128. data/maps/bis-ben-Beng-Latn-13194-1991.imp +151 -0
  129. data/maps/bis-dev-Deva-Latn-13194-1991.imp +178 -0
  130. data/maps/bis-guj-Gujr-Latn-13194-1991.imp +172 -0
  131. data/maps/bis-kan-Kana-Latn-13194-1991.imp +166 -0
  132. data/maps/bis-mlm-Mlym-Latn-13194-1991.imp +170 -0
  133. data/maps/bis-ori-Orya-Latn-13194-1991.imp +168 -0
  134. data/maps/bis-pnj-Guru-Latn-13194-1991.imp +169 -0
  135. data/maps/bis-tel-Telu-Latn-13194-1991.imp +165 -0
  136. data/maps/bis-tml-Taml-Latn-13194-1991.imp +149 -0
  137. data/maps/by-bel-Cyrl-Latn-1998.imp +123 -0
  138. data/maps/by-bel-Cyrl-Latn-2007.imp +77 -0
  139. data/maps/din-grc-Grek-Latn-31634-2011-t1.imp +627 -0
  140. data/maps/din-hin-Deva-Latn-33904-2018.imp +101 -0
  141. data/maps/din-kat-Geor-Latn-32707-2010.imp +103 -0
  142. data/maps/din-mar-Deva-Latn-33904-2018.imp +83 -0
  143. data/maps/din-nep-Deva-Latn-33904-2018.imp +110 -0
  144. data/maps/din-pli-Deva-Latn-33904-2018.imp +72 -0
  145. data/maps/din-pra-Deva-Latn-33904-2018.imp +66 -0
  146. data/maps/din-san-Deva-Latn-33904-2018.imp +294 -0
  147. data/maps/din-tam-Taml-Latn-33903-2016.imp +187 -0
  148. data/maps/dos-nep-Deva-Latn-1997.imp +47 -0
  149. data/maps/elot-ell-Grek-Latn-743-1982-tl.imp +399 -0
  150. data/maps/elot-ell-Grek-Latn-743-1982-ts.imp +397 -0
  151. data/maps/elot-ell-Grek-Latn-743-2001-tl.imp +34 -0
  152. data/maps/elot-ell-Grek-Latn-743-2001-ts.imp +178 -0
  153. data/maps/ggg-kat-Geor-Latn-2002.imp +75 -0
  154. data/maps/gki-bel-Cyrl-Latn-1992.imp +44 -0
  155. data/maps/gki-bel-Cyrl-Latn-2000.imp +159 -0
  156. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.imp +179 -0
  157. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.imp +132 -0
  158. data/maps/hk-yue-Hani-Latn-1888.imp +29201 -0
  159. data/maps/icao-bel-Cyrl-Latn-9303.imp +136 -0
  160. data/maps/icao-bul-Cyrl-Latn-9303.imp +127 -0
  161. data/maps/icao-fas-Arab-Latn-9303.imp +112 -0
  162. data/maps/icao-heb-Hebr-Latn-9303.imp +160 -0
  163. data/maps/icao-mkd-Cyrl-Latn-9303.imp +126 -0
  164. data/maps/icao-rus-Cyrl-Latn-9303.imp +126 -0
  165. data/maps/icao-srp-Cyrl-Latn-9303.imp +126 -0
  166. data/maps/icao-ukr-Cyrl-Latn-9303.imp +127 -0
  167. data/maps/iso-ara-Arab-Latn-233-1984.imp +301 -0
  168. data/maps/iso-asm-Beng-Latn-15919-2001.imp +73 -0
  169. data/maps/iso-ben-Beng-Latn-15919-2001.imp +171 -0
  170. data/maps/iso-ell-Grek-Latn-843-1997-t1.imp +365 -0
  171. data/maps/iso-ell-Grek-Latn-843-1997-t2.imp +43 -0
  172. data/maps/iso-guj-Gujr-Latn-15919-2001.imp +214 -0
  173. data/maps/iso-hin-Deva-Latn-15919-2001.imp +73 -0
  174. data/maps/iso-inc-Deva-Latn-15919-2001.imp +61 -0
  175. data/maps/iso-jpn-Hrkt-Latn-3602-1989.imp +59 -0
  176. data/maps/iso-kan-Kana-Latn-15919-2001.imp +212 -0
  177. data/maps/iso-kat-Geor-Latn-9984-1996.imp +103 -0
  178. data/maps/iso-kor-Hang-Latn-1996-method1.imp +140 -0
  179. data/maps/iso-kor-Hang-Latn-1996-method2.imp +132 -0
  180. data/maps/iso-mal-Mlym-Latn-15919-2001.imp +276 -0
  181. data/maps/iso-mar-Deva-Latn-15919-2001.imp +68 -0
  182. data/maps/iso-nep-Deva-Latn-15919-2001.imp +75 -0
  183. data/maps/iso-ori-Orya-Latn-15919-2001.imp +188 -0
  184. data/maps/iso-pan-Guru-Latn-15919-2001.imp +217 -0
  185. data/maps/iso-pli-Beng-Latn-15919-2001.imp +66 -0
  186. data/maps/iso-pli-Deva-Latn-15919-2001.imp +68 -0
  187. data/maps/iso-pli-Sinh-Latn-15919-2001.imp +211 -0
  188. data/maps/iso-pli-Thai-Latn-15919-2001.imp +47 -0
  189. data/maps/iso-pra-Deva-Latn-15919-2001.imp +60 -0
  190. data/maps/iso-prs-Arab-Latn-233-3-1999.imp +352 -0
  191. data/maps/iso-rus-Cyrl-Latn-9-1995.imp +279 -0
  192. data/maps/iso-san-Deva-Latn-15919-2001.imp +215 -0
  193. data/maps/iso-tam-Taml-Latn-15919-2001.imp +153 -0
  194. data/maps/iso-tel-Telu-Latn-15919-2001.imp +214 -0
  195. data/maps/iso-tha-Thai-Latn-11940-1998.imp +114 -0
  196. data/maps/kp-kor-Hang-Latn-2002.imp +540 -0
  197. data/maps/lshk-yue-Hani-Latn-jyutping-1993.imp +29005 -0
  198. data/maps/masm-mon-Cyrl-Latn-5217-2012.imp +136 -0
  199. data/maps/masm-mon-Latn-Cyrl-5217-2012.imp +162 -0
  200. data/maps/mext-jpn-Hrkt-Latn-1954.imp +403 -0
  201. data/maps/moct-kor-Hang-Latn-2000.imp +475 -0
  202. data/maps/mofa-jpn-Hrkt-Latn-1989.imp +484 -0
  203. data/maps/mv-div-Thaa-Latn-1987.imp +144 -0
  204. data/maps/mvd-bel-Cyrl-Latn-2008.imp +224 -0
  205. data/maps/mvd-bel-Cyrl-Latn-2010.imp +64 -0
  206. data/maps/mvd-rus-Cyrl-Latn-2008.imp +110 -0
  207. data/maps/mvd-rus-Cyrl-Latn-2010.imp +40 -0
  208. data/maps/odni-ara-Arab-Latn-2004.imp +106 -0
  209. data/maps/odni-ara-Arab-Latn-2015.imp +281 -0
  210. data/maps/odni-aze-Cyrl-Latn-2015.imp +158 -0
  211. data/maps/odni-bel-Cyrl-Latn-2015.imp +138 -0
  212. data/maps/odni-bul-Cyrl-Latn-2005.imp +90 -0
  213. data/maps/odni-bul-Cyrl-Latn-2015.imp +103 -0
  214. data/maps/odni-che-Cyrl-Latn-2015.imp +165 -0
  215. data/maps/odni-fas-Arab-Latn-2004.imp +268 -0
  216. data/maps/odni-fas-Arab-Latn-2015.imp +398 -0
  217. data/maps/odni-hin-Deva-Latn-2004.imp +180 -0
  218. data/maps/odni-hin-Deva-Latn-2015.imp +256 -0
  219. data/maps/odni-kat-Geor-Latn-2015.imp +76 -0
  220. data/maps/odni-kaz-Cyrl-Latn-2015.imp +164 -0
  221. data/maps/odni-kir-Cyrl-Latn-2015.imp +149 -0
  222. data/maps/odni-kor-Hang-Latn-2015.imp +307 -0
  223. data/maps/odni-mkd-Cyrl-Latn-2005.imp +28 -0
  224. data/maps/odni-mkd-Cyrl-Latn-2015.imp +124 -0
  225. data/maps/odni-prs-Arab-Latn-2004.imp +120 -0
  226. data/maps/odni-prs-Arab-Latn-2015.imp +225 -0
  227. data/maps/odni-pus-Arab-Latn-2011.imp +327 -0
  228. data/maps/odni-rus-Cyrl-Latn-2015.imp +79 -0
  229. data/maps/odni-srp-Cyrl-Latn-2005.imp +35 -0
  230. data/maps/odni-srp-Cyrl-Latn-2015.imp +130 -0
  231. data/maps/odni-tat-Cyrl-Latn-2015.imp +157 -0
  232. data/maps/odni-tgk-Cyrl-Latn-2015.imp +161 -0
  233. data/maps/odni-tuk-Cyrl-Latn-2015.imp +159 -0
  234. data/maps/odni-uig-Cyrl-Latn-2015.imp +151 -0
  235. data/maps/odni-ukr-Cyrl-Latn-2015.imp +136 -0
  236. data/maps/odni-urd-Arab-Latn-2015.imp +220 -0
  237. data/maps/odni-uzb-Cyrl-Latn-2015.imp +165 -0
  238. data/maps/sac-zho-Hans-Latn-1979.imp +20940 -0
  239. data/maps/sasm-mon-Mong-Latn-general-1978.imp +294 -0
  240. data/maps/sasm-mon-Mong-Latn-phonetic-1978.imp +261 -0
  241. data/maps/ses-ara-Arab-Latn-1930.imp +225 -0
  242. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.imp +171 -0
  243. data/maps/ua-ukr-Cyrl-Latn-1996.imp +149 -0
  244. data/maps/ua-ukr-Cyrl-Latn-2007.imp +69 -0
  245. data/maps/ua-ukr-Cyrl-Latn-2010.imp +128 -0
  246. data/maps/un-amh-Ethi-Latn-2016.imp +483 -0
  247. data/maps/un-ara-Arab-Latn-1971.imp +137 -0
  248. data/maps/un-ara-Arab-Latn-1972.imp +155 -0
  249. data/maps/un-ara-Arab-Latn-2017.imp +375 -0
  250. data/maps/un-asm-Beng-Latn-1972.imp +188 -0
  251. data/maps/un-bel-Cyrl-Latn-2007.imp +78 -0
  252. data/maps/un-ben-Beng-Latn-2016.imp +516 -0
  253. data/maps/un-ell-Grek-Latn-1987-phonetic.imp +437 -0
  254. data/maps/un-ell-Grek-Latn-1987-tl.imp +27 -0
  255. data/maps/un-ell-Grek-Latn-1987-ts.imp +269 -0
  256. data/maps/un-guj-Gujr-Latn-1972.imp +196 -0
  257. data/maps/un-hin-Deva-Latn-2016.imp +356 -0
  258. data/maps/un-kan-Kana-Latn-2016.imp +214 -0
  259. data/maps/un-mal-Mlym-Latn-1972.imp +215 -0
  260. data/maps/un-mar-Deva-Latn-2016.imp +96 -0
  261. data/maps/un-mon-Mong-Latn-general-2013.imp +170 -0
  262. data/maps/un-mon-Mong-Latn-phonetic-2013.imp +170 -0
  263. data/maps/un-nep-Deva-Latn-1972.imp +295 -0
  264. data/maps/un-nep-Deva-Latn-2013.imp +62 -0
  265. data/maps/un-ori-Orya-Latn-1972.imp +208 -0
  266. data/maps/un-pan-Guru-Latn-1972.imp +321 -0
  267. data/maps/un-prs-Arab-Latn-1967.imp +214 -0
  268. data/maps/un-rus-Cyrl-Latn-1987.imp +96 -0
  269. data/maps/un-sin-Sinh-Latn-1972.imp +193 -0
  270. data/maps/un-tam-Taml-Latn-1972.imp +173 -0
  271. data/maps/un-tel-Telu-Latn-1972.imp +229 -0
  272. data/maps/un-ukr-Cyrl-Latn-1998.imp +58 -0
  273. data/maps/un-ukr-Cyrl-Latn-2012.imp +95 -0
  274. data/maps/un-urd-Arab-Latn-1972.imp +290 -0
  275. data/maps/var-amh-Ethi-Latn-eae-2003.imp +414 -0
  276. data/maps/var-gez-Ethi-Latn-eae-2003.imp +54 -0
  277. data/maps/var-hin-Deva-Latn-hunterian-1872.imp +212 -0
  278. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.imp +399 -0
  279. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.imp +382 -0
  280. data/maps/var-kor-Hang-Hang-jamo.imp +11196 -0
  281. data/maps/var-kor-Hang-Latn-mr-1939.imp +574 -0
  282. data/maps/var-kor-Kore-Hang-2013.imp +59764 -0
  283. data/maps/var-kor-Kore-Latn-mr-1939.imp +36 -0
  284. data/maps/var-mar-Deva-Latn-hunterian-1872.imp +39 -0
  285. data/maps/var-mon-Mong-Latn-1930.imp +101 -0
  286. data/maps/var-mon-Mong-Latn-lessing.imp +181 -0
  287. data/maps/var-mon-Mong-Latn-vpmc.imp +182 -0
  288. data/maps/var-pra-Deva-Latn-iast-1912.imp +36 -0
  289. data/maps/var-san-Deva-Latn-iast-1912.imp +147 -0
  290. data/maps/var-zho-Hani-Latn-wd-1979.imp +27549 -0
  291. metadata +335 -0
@@ -0,0 +1,1171 @@
1
+ metadata {
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- Arabic (1997)
8
+ url: http://catdir.loc.gov/catdir/cpso/romanization/arabic.pdf
9
+ creation_date: 1997
10
+ description: |
11
+ ALA-LC Romanization table for Arabic
12
+
13
+ notes:
14
+ - For the use of alif to support hamzah, see rule 2. For the romanization of hamzah by the consonantal sign ’ (alif), see rule 8(a). For other orthographic uses of alif see rules 3-5.
15
+
16
+ - The Maghribī variations ڢ and ڧ are romanized f and q respectively.
17
+
18
+ - ة in a word in the construct state is romanized t. See rule 7(b).
19
+
20
+ # Arabic Letters Romanized in Different Ways Depending on Their Context
21
+ - |
22
+ Rule 1 As indicated in the table, ﻭ and ي may represent:
23
+
24
+ (a) The consonants romanized w and y, respectively.
25
+
26
+ waḍ‘ وضع
27
+ ‘iwaḍ عوض
28
+ dalw دلو
29
+ yad يد
30
+ ḥiyal حيل
31
+ ṭahy طهي
32
+
33
+ (b) The long vowels romanized ū, ī, and ā respectively.
34
+
35
+ ūlá أولى
36
+ ṣūrah صورة
37
+ dhū ذو
38
+ īmān إيمان
39
+ jīl جيل
40
+ fī في
41
+ kitāb كتاب
42
+ saḥāb سحاب
43
+ jumān جمان
44
+
45
+ See also rules 11(a) and 11(b)(1-2).
46
+
47
+ (c) The diphthongs romanized aw and ay, respectively.
48
+
49
+ awj أوج
50
+ nawm نوم
51
+ law لو
52
+ aysar أيسر
53
+ shaykh شيخ
54
+ ‘aynay عيني
55
+
56
+ - Rule 2 ا (alif), و and ى when used to support ء (hamzah) are not represented in romanization. See rule 8(a).
57
+
58
+ - Rule 3 ا (alif) when used to support waṣlah ( ٱ ) and maddah ( آ ) is not represented in romanization. See rules 9 and 10.
59
+
60
+ - |
61
+ Rule 4 ا (alif) and و when used as orthographic signs without phonetic significance are not represented in romanization.
62
+
63
+ fa‘alū فعلوا
64
+ ulā’ika أولائك
65
+ ūqīyah أوقية
66
+
67
+ See also rule 12 and examples cited in rules 23-26.
68
+
69
+
70
+ - |
71
+ Rule 5 ا (alif) is used to represent the long vowel romanized ā, as indicated in the table.
72
+
73
+ fā‘il فاعل
74
+ riḍā رضا
75
+
76
+ This alif, when medial, is sometimes omitted in Arabic; it is always indicated in romanization. See rule 19.
77
+
78
+ - |
79
+ Rule 6 Final ى appears in the following special cases:
80
+
81
+ (a) As ﻯ َ (alif maqṣūrah) used in place of َا to represent the long vowel romanized ā.
82
+
83
+ ḥattá حتَّى
84
+ maḍá مضَى
85
+ kubrá كبرَى
86
+ Yaḥyá يحيَى
87
+ musammá مسمَّى
88
+ Muṣṭafá مصطفَى
89
+
90
+ (b) As ِ ﻯّ in nouns and adjectives of the form fā‘īl which are derived from defective roots. This ending is romanized ī, not īy, without regard to the presence of ّ (shaddah). See rule 11(b)(2).
91
+
92
+ Raḍī al-Dīn رضي الدين
93
+
94
+ Compare the fa‘īl form of the same root الرضى[without shaddah] al-Raḍī.
95
+
96
+ (c) As ِ ﻯّ in the relative adjective (nisbah). The ending, like (b) above, is romanized ī, not īy.
97
+
98
+ al-Miṣrī المصرِيّ
99
+
100
+ Compare المصرِيّة al-Miṣrīyah and see rule 11(b)(1).
101
+
102
+ - |
103
+ Rule 7 ة (tā’ marbūṭah)
104
+
105
+ (a) When the noun or adjective ending in ة is indefinite, or is preceded by the definite article, ة is romanized h. The ة in such positions is often replaced by ه.
106
+
107
+ ṣalāh صلاة
108
+ al-Risālah al-bahīyah الرسالة البهية
109
+ mir’āh مرآة
110
+ Urjūzah fī al-ṭibb أرجوزة فى الطب
111
+
112
+
113
+ (b) When the word ending in ة is in the construct state [muḍāf wa-muḍāf ilayh], ة is romanized t.
114
+
115
+ Wizārat al-Tarbiyah وزارة التربية
116
+ Mir’āt al-zamān مرآة الزمان
117
+
118
+
119
+ (c) When the word ending in ة is used adverbially, ة (vocalized ةً) is romanized tan. See rule 12(b).
120
+
121
+ - |
122
+ Rule 8 ء (hamzah)
123
+
124
+ (a) In initial position, whether at the beginning of a word, following a prefixed preposition or conjunction, or following the definite article, ء is not represented in romanization. When medial or final, ء is romanized as ’ (alif).
125
+
126
+ asad أسد
127
+ uns أنس
128
+ idhā إذا
129
+ mas’alah مسألة
130
+ mu’tamar مؤتمر
131
+ dā’im دائم
132
+ mala’a ملأ
133
+ khaṭi’a خطئ
134
+
135
+ (b) ء, when replaced by the sign (waṣlah) and then known as hamzat al-waṣl, is not represented in romanization. See rule 9 below.
136
+
137
+ (waṣlah), like initial ء, is not represented in romanization. See also rule 8(b) above. When the alif which supports waṣlah belongs to the article ال, the initial vowel of the article is romanized a. See rule 17(b). In other words, beginning with hamzat al-waṣl, the initial vowel is romanized i.
138
+
139
+ Riḥlat Ibn Jubayr رحلة ٱبن جبير
140
+ al-istidrāk الإستدراك
141
+ kutub iqtanatʹhā كتب ٱقتنتها
142
+ bi-ihtimām ‘Abd al-Majīd باهتمام عبد ٱلمجيد
143
+
144
+ - |
145
+ Rule 9 (waṣlah), like initial ء, is not represented in romanization.
146
+ See also rule 8(b) above. When the alif which supports waṣlah belongs to the article ال, the initial vowel of the article is romanized a.
147
+ See rule 17(b). In other words, beginning with hamzat al-waṣl, the initial vowel is romanized i.
148
+
149
+ Riḥlat Ibn Jubayr رحلة ٱبن جبير
150
+ al-istidrāk الإستدراك
151
+ kutub iqtanatʹhā كتب ٱقتنتها
152
+ bi-ihtimām ‘Abd al-Majīd باهتمام عبد ٱلمجيد
153
+
154
+ - |
155
+ Rule 10 ˜ (maddah)
156
+
157
+ (a) Initial آ is romanized ā.
158
+
159
+ ālah آلة
160
+ Kullīyat al-Ādāb كلية الآداب
161
+
162
+ (b) Medial آ, when it represents the phonetic combination ’ā, is so romanized.
163
+
164
+ ta’ālīf تآليف
165
+ ma’āthir مآثر
166
+
167
+ (c)˜ is otherwise not represented in romanization.
168
+
169
+ khulafā’ خلفآء
170
+ - |
171
+ Rule 11 ّ (shaddah or tashdīd)
172
+
173
+ (a) Over و
174
+
175
+ (1) ُوّ, representing the combination of long vowel plus consonant, is romanized ūw.
176
+
177
+ ‘adūw عدُوّ
178
+ qūwah قُوّة
179
+
180
+ (2) َوّ, representing the combination of diphthong plus consonant, is romanized aww.
181
+
182
+ Shawwāl شَوّال
183
+ ṣawwara صَوّر
184
+ jaww جوّ
185
+
186
+
187
+ See also rule 1(c).
188
+
189
+ (b) Over ى
190
+
191
+ (1) Medial ِىّ, representing the combination of long vowel plus consonant, is romanized īy.
192
+
193
+ al-Miṣrīyah المصرِيّة
194
+
195
+ See also rule 1(b).
196
+
197
+ (2) Final ِىّ is romanized ī. See rules 6(b) and 6(c).
198
+
199
+ (3) Medial and final َىّ, representing the combination of diphthong plus consonant, is romanized ayy.
200
+
201
+ ayyām أَيّام
202
+ sayyid سَيّد
203
+ Quṣayy قصَيّ
204
+
205
+ See also rule 1(c).
206
+
207
+ (c) Over other letters, ّ is represented in romanization by doubling the letter or digraph concerned.
208
+
209
+ al-Ghazzī الغزّيّ
210
+ al-Kashshāf الكشّاف
211
+
212
+ - |
213
+ Rule 12 Tanwīn may take the written form ٌ, ً (ًا), or ٍ, romanized un, an, and in, respectively. Tanwīn is normally disregarded in romanization, however. It is indicated in the following cases:
214
+
215
+ (a) When it occurs in indefinite nouns derived from defective roots.
216
+
217
+ qāḍin قاضٍ
218
+ ma‘nan معنىً
219
+
220
+ (b) When it indicates the adverbial use of a noun or adjective.
221
+
222
+ ṭab‘an طبعًا
223
+ faj’atan فجأةً
224
+ al-Mushtarik waḍ‘an المشترك وضعاً
225
+ wa-al-muftariq ṣuq‘an والمفترق صقعاً
226
+ - |
227
+ Rule 13 Tanwīn may take the written form ٌ, ً (ًا), or ٍ, romanized un, an, and in, respectively. Tanwīn is normally disregarded in romanization, however. It is indicated in the following cases:
228
+
229
+ (a) When it occurs in indefinite nouns derived from defective roots.
230
+
231
+ qāḍin قاضٍ
232
+ ma‘nan معنىً
233
+
234
+ (b) When it indicates the adverbial use of a noun or adjective.
235
+
236
+ ṭab‘an طبعًا
237
+ faj’atan فجأةً
238
+ al-Mushtarik waḍ‘an المشترك وضعاً
239
+ wa-al-muftariq ṣuq‘an والمفترق صقعاً
240
+
241
+ # Grammatical Structure as It Affects Romanization
242
+ - |
243
+ Rule 13 Final inflections of verbs are retained in romanization, except in pause. represent
244
+
245
+ man waliya Miṣr من ولي مصر
246
+ ma‘rifat mā yajibu la-hum معرفة ما يجب لهم
247
+ ṣallá Allāh ‘alayhi wa-sallam صلى الله عليه وسلم
248
+ al-Lu’lu’ al-maknūn fī ḥukm اللؤلؤ المكنون فى حكم
249
+ al-ikhbār ‘ammā sa-yakūn الإخبار عما سيكون
250
+
251
+ - |
252
+ Rule 14 Final inflections of nouns and adjectives:
253
+
254
+ (a) Vocalic endings are not represented in romanization, except preceding pronominal suffixes, and except when the text being romanized is in verse.
255
+
256
+ uṣūluhā al-nafsīyah wa-ṭuruq أصولها النفسية وطرق تدريسها
257
+ tadrīsihā
258
+ ilá yawminā hādhā الى يومنا هذا
259
+
260
+ (b) Tanwīn is not represented in romanization, except as specified in rule 12.
261
+
262
+ (c) ة (tā’ marbūṭah) is romanized h or t as specified in rule 7.
263
+
264
+ (d) For the romanization of the relative adjective (nisbah) see rule 6(c).
265
+
266
+ - |
267
+ Rule 15 Pronouns, pronominal suffixes, and demonstratives:
268
+
269
+ (a) Vocalic endings are retained in romanization.
270
+
271
+ anā wa-anta انا وانت
272
+ hādhihi al-ḥāl هذه الحال
273
+ mu’allafātuhu wa-shurūḥuhā مؤلفاته وشروحها
274
+
275
+ (b) At the close of a phrase or sentence, the ending is romanized in its pausal form.
276
+
277
+ ḥayātuhu wa-‘aṣruh حياته وعصره
278
+ Tawfīq al-Ḥakīm, afkāruh, توفيق الحكيم، أفكاره، آثاره
279
+ āthāruh
280
+
281
+ - |
282
+ Rule 16 Prepositions and conjunctions:
283
+
284
+ (a) Final vowels of separable prepositions and conjunctions are retained in romanization.
285
+
286
+ anna أن
287
+ annahu أنه
288
+ bayna yadayhi بين يديه
289
+
290
+ Note the special cases مما mimmā, ممن mimman.
291
+
292
+ (b) Inseparable prepositions, conjunctions, and other prefixes are connected with what follows by a hyphen.
293
+
294
+ bi-hi به
295
+ wa-ma‘ahu ومعه
296
+ lā-silkī لاسلكي
297
+ - |
298
+ Rule 17 The definite article:
299
+
300
+ (a) The romanized form al is connected with the following word by a hyphen.
301
+
302
+ al-kitāb al-thānī الكتاب الثاني
303
+ al-ittiḥād الإتحاد
304
+ al-aṣl الأصل
305
+ al-āthār الآثار
306
+
307
+ (b) When ال is initial in the word, and when it follows an inseparable preposition or conjunction, it is always romanized al regardless of whether the preceding word, as romanized, ends in a vowel or a consonant.
308
+
309
+ ilá al-ān الى الآن
310
+ Abū al-Wafā’ ابو الوفاء
311
+ Maktabat al-Nahḍah al-Miṣrīyah مكتبة النهضة المصرية
312
+ bi-al-tamām wa-al-kamāl بالتمام والكمال
313
+
314
+ Note the exceptional treatment of the preposition ل followed by the article
315
+
316
+ lil-Shirbīnī للشربيني
317
+
318
+ See also rule 23.
319
+
320
+ (c) The ل of the article is always romanized l, whether it is followed by a “sun letter” or not, i.e., regardless of whether or not it is assimilated in pronunciation to the initial consonant of the word to which it is attached.
321
+
322
+ al-ḥurūf al-abjadīyah الحروف الأبجدية
323
+ Abū al-Layth al-Samarqandī ابو الليث السمرقندي
324
+
325
+ - |
326
+ Rule 18 Capitalization:
327
+
328
+ (a) Rules for the capitalization of English are followed, except that the definite article al is given in lower case in all positions.
329
+
330
+ (b) Diacritics are used with both upper and lower case letters.
331
+
332
+ al-Ījī الايجي
333
+ al-Ālūsī الآلوسي
334
+
335
+ - |
336
+ Rule 19 The macron or the acute accent, as appropriate, is used to indicate all long vowels, including those which in Arabic script are written defectively. The macron or the acute accent, as the case may be, is retained over final long vowels which are shortened in pronunciation before hamzat al-waṣl.
337
+
338
+ Ibrāhīm إبراهيم ، إبرهيم
339
+ Dā’ūd داؤود ، داؤد
340
+ Abū al-Ḥasan ابو الحسن
341
+ ru’ūs رؤوس
342
+ dhālika ذلك
343
+ ‘alá al-‘ayn على العين
344
+
345
+ - |
346
+ Rule 20 The hyphen is used:
347
+
348
+ (a) To connect the definite article al with the word to which it is attached. See rule 17(a).
349
+
350
+ (b) Between an inseparable prefix and what follows. See rules 16(b) and 17(b) above.
351
+
352
+ (c) Between bin and the following element in personal names when they are written in Arabic as a single word. See rule 25.
353
+
354
+ - |
355
+ Rule 21 The prime ( ʹ ) is used:
356
+
357
+ (a) To separate two letters representing two distinct consonantal sounds, when the combination might otherwise be read as a digraph.
358
+
359
+ Adʹham أدهم
360
+ akramatʹhā أكرمتها
361
+
362
+ (b) To mark the use of a letter in its final form when it occurs in the middle of a word.
363
+
364
+ Qal‘ahʹjī قلعه‌جى
365
+ Shaykhʹzādah شيخ زاده
366
+
367
+ - |
368
+ Rule 22 As in the case of romanization from other languages, foreign words which occur in an Arabic context and are written in Arabic letters are romanized according to the rules for romanizing Arabic.
369
+ Jārmānūs (not Germanos nor Germanus) جارمانوس
370
+ Lūrd Ghrānfīl (not Lord Granville) لورد غرانفيل
371
+ Īsāghūjī (not Isagoge) ايساغوجي
372
+
373
+ For short vowels not indicated in the Arabic, the Arabic vowel nearest to the original pronunciation is supplied.
374
+
375
+ Gharsiyā Khayin (not García Jaén) غرسيا خين
376
+
377
+ # Examples of Irregular Arabic Orthography
378
+
379
+ - |
380
+ Rule 23 Note the romanization of الله, alone and in combination.
381
+
382
+ Allāh الله
383
+ billāh
384
+ lillāh
385
+ bismillāh بسم الله
386
+ al-Mustanṣir billāh
387
+
388
+ - |
389
+ Rule 24 Note the romanization of the following personal names:
390
+
391
+ Ṭāhā طه
392
+ Yāsīn يس ، يسن
393
+ ‘Amr عمرو
394
+ Bahjat بهجت ، بهجة
395
+
396
+ - |
397
+ Rule 25 ابن and بن are both romanized ibn in all positions.
398
+
399
+ Aḥmad ibn Muḥammad ibn Abī al-Rabī‘ احمد بن محمد بن ابي الربيع
400
+ Sharḥ Ibn ‘Aqīl ‘alá Alfīyat Ibn Mālik شرح ابن عقيل على الفية ابن مالك
401
+
402
+ Exception is made in the case of modern names, typically North African, in which the element بن is pronounced bin.
403
+
404
+ Bin Khiddah بن خده
405
+ Bin-‘Abd Allāh بنعبد الله
406
+ }
407
+
408
+ tests {
409
+ # From Rule 1 - part a
410
+ test "وَضعْ", "waḍ‘"
411
+ test "عِوَضْ", "‘iwaḍ"
412
+ test "دَلو", "dalw"
413
+ test "يَد", "yad"
414
+ test "حِيَل", "ḥiyal"
415
+ test "طَهي", "ṭahy"
416
+ # From Rule 1 - part b
417
+ test "أُولَى", "ūlá"
418
+ test "صُورَة", "ṣūrah"
419
+ test "ذُو", "dhū"
420
+ test "إيمَان", "īmān"
421
+ test "جِيْل", "jīl"
422
+ test "فِي", "fī"
423
+ test "كِتَاب", "kitāb"
424
+ test "سَحَاب", "saḥāb"
425
+ test "جُمَان", "jumān"
426
+ # From Rule 1 - part c
427
+ test "أوج", "awj"
428
+ test "نَوم", "nawm"
429
+ test "لَو", "law"
430
+ test "أيسَر", "aysar"
431
+ test "شَيخ", "shaykh"
432
+ test "عَينَي", "‘aynay"
433
+ # From Rule 4
434
+ test "فَعَلُوا", "fa‘alū"
435
+ # - source: أُولَائِكَ
436
+ # expected: ulā’ika
437
+ test "أُوقِيَّة", "ūqīyah"
438
+ # From Rule 5
439
+ test "فَاعِل", "fā‘il"
440
+ test "رِضَا", "riḍā"
441
+ # From Rule 6 - part a
442
+ test "حَتَّى", "ḥattá"
443
+ test "مَضَى", "maḍá"
444
+ test "كُبرَى", "kubrá"
445
+ test "يَحيَى", "yaḥyá"
446
+ test "مُسَمَّى", "musammá"
447
+ test "مُصطَفَى", "muṣṭafá"
448
+ # From Rule 6 - part b
449
+ test "رَضِي الدِين", "raḍī al-dīn"
450
+ # From Rule 6 - part c
451
+ test "المِصرِيّ", "al-miṣrī"
452
+ # From Rule 7 - part a
453
+ test "صَلَاة", "ṣalāh"
454
+ test "الرِسَالَة البَهِيَّة", "al-risālah al-bahīyah"
455
+ test "مِرآة", "mir’āh"
456
+ # - source: أرجوزة فى الطب
457
+ # expected: Urjūzah fī al-ṭibb
458
+ # From Rule 7 - part b
459
+ test "وِزَارَة التَربِيَة", "wizārat al-tarbiyah"
460
+ test "مِرآة الزَمَان", "mir’āt al-zamān"
461
+ # From Rule 8 - part a
462
+ test "أَسَد", "asad"
463
+ test "أُنس", "uns"
464
+ test "إذَا", "idhā"
465
+ test "مَسأَلَة", "mas’alah"
466
+ test "مُؤتَمَر", "mu’tamar"
467
+ test "دَائِم", "dā’im"
468
+ test "مَلَأ", "mala’a"
469
+ test "خَطِئ", "khaṭi’a"
470
+ # From Rule 9
471
+ test "رِحلَة إبن جُبَير", "riḥlat ibn jubayr"
472
+ test "الإستِدرَاك", "al-istidrāk"
473
+ # - source: كُتُب إقتَنَتهَا
474
+ # expected: kutub iqtanatʹhā # issue
475
+ # - source: باهتمام عبد ٱلمجيد
476
+ # expected: bi-ihtimām ‘Abd al-Majīd #issue
477
+ # From Rule 10 - part a
478
+ test "آلَة", "ālah"
479
+ test "كُلِّيَّة الآدَاب", "kullīyat al-ādāb"
480
+ # From Rule 10 - part b
481
+ test "تَآلِيف", "ta’ālīf"
482
+ test "مَآثِر", "ma’āthir"
483
+ # From Rule 10 - part c
484
+ test "خُلَفَآء", "khulafā’"
485
+ # From Rule 11 - part a-1
486
+ test "عَدُوّ", "‘adūw"
487
+ test "قُوَّة", "qūwah"
488
+ # From Rule 11 - part a-2
489
+ test "شَوَّال", "shawwāl"
490
+ test "صَوَّرَ", "ṣawwara"
491
+ test "جَوّ", "jaww"
492
+ # From Rule 11 - part b-1
493
+ test "المِصرِيَّة", "al-miṣrīyah"
494
+ # From Rule 11 - part b-3
495
+ test "أَيَّام", "ayyām"
496
+ test "سَيِّد", "sayyid"
497
+ test "قُصَيّ", "quṣayy"
498
+ # From Rule 11 - part c
499
+ test "الغَزِّيّ", "al-ghazzī"
500
+ test "الكَشَّاف", "al-kashshāf"
501
+ # From Rule 12 - part a
502
+ test "قَاضٍ", "qāḍin"
503
+ test "مَعنًى", "ma‘nan"
504
+ # From Rule 12 - part b
505
+ test "طَبعًا", "ṭab‘an"
506
+ test "فَجأَةً", "faj’atan"
507
+ test "المُشتَرِك وَضعاً", "al-mushtarik waḍ‘an"
508
+ # - source: وَالمُفتَرِق صُقعاً #issue
509
+ # expected: wa-al-muftariq ṣuq‘an
510
+ # Grammar
511
+ # From Rule 13
512
+ test "مَن وَلِيَ مِصر", "man waliya miṣr"
513
+ # - source: مَعرِفَة مَا يَجِبُ لَهُم
514
+ # expected: ma‘rifat mā yajibu la-hum
515
+ # - source: صَلَّى اللَّه عَلَيهِ وسَلَّم # issue allah starting with al
516
+ # expected: ṣallá Allāh ‘alayhi wa-sallam
517
+ test "اللُؤلُؤ المَكنُون فِي حُكم", "al-lu’lu’ al-maknūn fī ḥukm"
518
+ # - source: الإخبَار عَمَّا سَيَكُون #issue sa-yakūn
519
+ # expected: al-ikhbār ‘ammā sa-yakūn
520
+ # From Rule 14 - part a
521
+ # - source: أُصُولَهَا النَفسِيَّة وَطُرُق تَدرِيسِهَا # issue wa-ṭuruq
522
+ # expected: uṣūluhā al-nafsīyah wa-ṭuruq tadrīsihā
523
+ test "إلَى يَومِنَا هَذَا", "ilá yawminā hādhā"
524
+ # From Rule 15 - part a
525
+ # - source: انا وانت # issue waw atf
526
+ # expected: anā wa-anta
527
+ test "هَذِهِ الحَال", "hādhihi al-ḥāl"
528
+ # - source: مُؤَلَّفَاتُهُ وَشُرُوحُهَا
529
+ # expected: mu’allafātuhu wa-shurūḥuhā
530
+ # From Rule 15 - part b
531
+ # - source: حياته وعصره
532
+ # expected: ḥayātuhu wa-‘aṣruh
533
+ test "تَوفِيق الحَكِيم، أَفكَارُه، آثَارُه", "tawfīq al-ḥakīm, afkāruh, āthāruh"
534
+ # From Rule 16 - part a
535
+ test "أَنَّ", "anna"
536
+ test "أَنَّهُ", "annahu"
537
+ test "بَينَ يَدَيهِ", "bayna yadayhi"
538
+ # From Rule 16 - part b
539
+ # - source: به
540
+ # expected: bi-hi
541
+ # - source: ومعه
542
+ # expected: wa-ma‘ahu
543
+ # - source: لاسلكي
544
+ # expected: lā-silkī
545
+ # From Rule 17 - part a
546
+ test "الكِتَاب الثَانِي", "al-kitāb al-thānī"
547
+ test "الإتِّحَاد", "al-ittiḥād"
548
+ test "الأَصل", "al-aṣl"
549
+ test "الآثَار", "al-āthār"
550
+ # From Rule 17 - part b
551
+ test "إلَى الآن", "ilá al-ān"
552
+ test "ابُو الوَفَاء", "abū al-wafā’"
553
+ test "مَكتَبَة النَهضَة المِصرِيَّة", "maktabat al-nahḍah al-miṣrīyah"
554
+ # - source: بالتمام والكمال
555
+ # expected: bi-al-tamām wa-al-kamāl
556
+ # - source: للشربيني
557
+ # expected: lil-Shirbīnī
558
+ # From Rule 17 - part c
559
+ test "الحُرُوف الأَبجَدِيَّة", "al-ḥurūf al-abjadīyah"
560
+ test "ابُو اللَيث السَمَرقَندِي", "abū al-layth al-samarqandī"
561
+ # From Rule 18 - part b
562
+ test "الإيجِي", "al-ījī"
563
+ test "الآلُوسِي", "al-ālūsī"
564
+ # From Rule 19
565
+ # - source: إبراهيم ، إبرهيم
566
+ # expected: Ibrāhīm
567
+ # - source: داؤود ، داؤد
568
+ # expected: Dā’ūd
569
+ # - source: ابو الحسن
570
+ # expected: Abū al-Ḥasan
571
+ test "رُؤُوس", "ru’ūs"
572
+ # - source: ذَلِكَ
573
+ # expected: dhālika
574
+ test "عَلَى العَين", "‘alá al-‘ayn"
575
+ # # From Rule 21 - part a
576
+ # - source: أدهم
577
+ # expected: Adʹham
578
+ # - source: أكرمتها
579
+ # expected: akramatʹhā
580
+ # # From Rule 21 - part b
581
+ # - source: قلعه‌جى
582
+ # expected: Qal‘ahʹjī
583
+ # - source: شيخ زاده
584
+ # expected: Shaykhʹzādah
585
+ # From Rule 22
586
+ test "جَارمَانُوس", "jārmānūs" # not Germanos nor Germanus
587
+ test "لُورد غرَانفِيل", "lūrd ghrānfīl" # not Lord Granville
588
+ test "إيسَاغُوجِي", "īsāghūjī" # not Isagoge
589
+ # - source: غرسيا خين
590
+ # expected: Gharsiyā Khayin # not García Jaén
591
+ # From Rule 23
592
+ test "اللَّه", "Allāh"
593
+ # - source: بسم الله
594
+ # expected: bismillāh
595
+ # # From Rule 24
596
+ # - source: طه
597
+ # expected: Ṭāhā
598
+ # - source: يس ، يسن
599
+ # expected: Yāsīn
600
+ # - source: عمرو
601
+ # expected: ‘Amr
602
+ # - source: بهجت ، بهجة
603
+ # expected: Bahjat
604
+ # # From Rule 25
605
+ # - source: احمد بن محمد بن ابي الربيع
606
+ # expected: Aḥmad ibn Muḥammad ibn Abī al-Rabī‘
607
+ # - source: شرح ابن عقيل على الفية ابن مالك
608
+ # expected: Sharḥ Ibn ‘Aqīl ‘alá Alfīyat Ibn Mālik
609
+ # - source: بن خده
610
+ # expected: Bin Khiddah
611
+ # - source: بنعبد الله
612
+ # expected: Bin-‘Abd Allāh
613
+ }
614
+
615
+ stage {
616
+
617
+ # CHARACTERS
618
+ parallel {
619
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
620
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
621
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
622
+
623
+ sub "\u0650\u064a", "iy", after: any(["\u064e", "u064f"]) # ـِي kasra followed by ي
624
+
625
+
626
+ # pointing
627
+ sub "\u064e", "a" # َ fatha
628
+ # '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
629
+ # '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
630
+ sub "\u0650", "i" # ِ kasra
631
+ sub "\u064f", "u" # ُ damma
632
+ sub "\u0652", "" # ْ sokoon, see note A below
633
+
634
+
635
+ # special pointed letters
636
+ sub "\u0639\u064e", "‘a" # عَ
637
+ sub "\u0639\u0650", "‘i" # عِ
638
+ sub "\u0639\u064f", "‘ū" # عُ
639
+ # handle MacOS regex difference
640
+ sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و
641
+
642
+ sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
643
+ sub "\u064f\u0648", "ū" # ـُو damma followed by و
644
+ sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
645
+ sub "\u064e\u0649", "á" # ـَى fatha followed by ى which is ا not ي
646
+ sub "\u064e\u0648\u0652", "aw" # ـَوْ
647
+ sub "\u064e\u064a\u0652", "ay" # ـَيْ
648
+
649
+
650
+ # ta' marboota
651
+ sub "\u0629", "t" # ة in the middle of the sentence
652
+ sub "\u0629" + line_end, "h"
653
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
654
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
655
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
656
+
657
+
658
+
659
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
660
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
661
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
662
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
663
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
664
+ sub "\u0629", "h", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
665
+ sub "\u0623\u0648", "aw" # أو
666
+ sub "\u0623\u064a", "ay" # أي
667
+
668
+ # Rule 4
669
+ sub "\u064f\u0648\u0627", "ū" # وا
670
+
671
+ # Rule 8
672
+ sub "\u0621", "’"
673
+ sub boundary + "\u0623", "" # أ
674
+ sub "\u0623", "’" # أ
675
+ sub "\u0623" + boundary, "’a" # أ
676
+ sub "\u0623\u064e", 'a', not_after: any("\u0629\u0644")
677
+ sub "\u0624", "’" # ؤ
678
+ sub "\u0625\u064a", "ī" # إِ
679
+ sub "\u0625", "i" # إِ
680
+ sub "\u0626", "’" # ئ
681
+ sub "\u0626" + boundary, "’a" # ئ
682
+ sub boundary + "\u0627", "a" # ا
683
+ sub "\u0627", "" # ا
684
+
685
+ # Rule 10
686
+
687
+ sub boundary + "\u0622", "ā" # آ
688
+ sub "\u0622", "’ā", not_before: any([boundary + "\u0627\u0644", boundary]), not_after: any([boundary, "\u0621"]) # آ in middle, not final, or initial, or after ال
689
+ sub "\u064e\u0622", "ā", not_after: any("\u062b\u0644") # ـَآ fatha followed by ا
690
+ sub "\u0622", "ā", before: "\u0644" # added after debugging
691
+ sub "\u0622", "" # آ
692
+
693
+ # Rule 11 - shadda
694
+ sub "\u064f\u0648\u0651", "ūw" # ـَوّ damma followed by و with shadda
695
+ sub "\u064e\u0648\u0651", "aww" # ـَوّ fatha followed by و with shadda
696
+ sub "\u064e\u064a\u0651", "ayy" # ـَيّ fatha followed by و with shadda
697
+ sub "\u0650\u064a\u0651", "īy" # ـِيَّ
698
+ sub "\u0650\u064a\u0651" + boundary, "ī" # ـِيَّ
699
+
700
+ sub "\u0628\u0651", "bb" # ب
701
+ sub "\u062a\u0651", "tt" # ت
702
+ sub "\u062b\u0651", "thth" # ث
703
+ sub "\u062c\u0651", "jj" # ج
704
+ sub "\u062d\u0651", "ḥḥ" # ح
705
+ sub "\u062e\u0651", "khkh" # خ
706
+ sub "\u062f\u0651", "dd" # د
707
+ sub "\u0630\u0651", "dhdh" # ذ
708
+ sub "\u0631\u0651", "rr" # ر
709
+ sub "\u0632\u0651", "zz" # ز
710
+ sub "\u0633\u0651", "ss" # س
711
+ sub "\u0634\u0651", "shsh" # ش
712
+ sub "\u0635\u0651", "ṣṣ" # ص
713
+ sub "\u0636\u0651", "ḍḍ" # ض
714
+ sub "\u0637\u0651", "ṭṭ" # ط
715
+ sub "\u0638\u0651", "ẓẓ" # ظ
716
+ sub "\u063a\u0651", "ghgh" # غ
717
+ sub "\u0641\u0651", "ff" # ف
718
+ sub "\u0642\u0651", "qq" # ق
719
+ sub "\u0643\u0651", "kk" # ك
720
+ sub "\u0644\u0651", "ll" # ل
721
+ sub "\u0645\u0651", "mm" # م
722
+ sub "\u0646\u0651", "nn" # ن
723
+ sub "\u0647\u0651", "hh" # ه
724
+ sub "\u0648\u0651", "ww" # و
725
+ sub "\u064a\u0651", "yy" # ي
726
+
727
+ # Rule 12 - tanwin
728
+
729
+ sub "\u064c", "un" # ٌ
730
+ sub "\u064b", "an" # ً
731
+ sub "\u064d", "in" # ٍ
732
+ # tanween should be onb the letter preceeding the end in case of ا, ى
733
+ # however, it's common that people mistake that, so we're handling both orders
734
+ sub "\u064b\u0649", "an" # ً
735
+ sub "\u064b\u0627", "an" # ً
736
+ sub "\u0649\u064b", "an" # ً
737
+ sub "\u0627\u064b", "an" # ً
738
+
739
+ # Rule 13
740
+ sub "\u0647\u064e", "hā" # ه
741
+
742
+ sub "\u060c", "," # ،
743
+
744
+ sub "\u0627\u0644\u0644\u0651\u064e\u0647", "Allāh"
745
+
746
+ sub boundary + "\u0627\u0644", "al-" # ال
747
+ # '\uFE8E' : '' # ﺎ
748
+
749
+ sub "\u0628", "b" # ب
750
+ sub "\uFE91", "b" # ﺑ
751
+ sub "\uFE92", "b" # ﺒ
752
+ sub "\uFE90", "b" # ﺐ
753
+
754
+ sub "\u062a", "t" # ت
755
+ sub "\ufe97", "t" # ﺗ
756
+ sub "\ufe98", "t" # ﺘ
757
+ sub "\ufe96", "t" # ﺖ
758
+
759
+ sub "\u062b", "th" # ث
760
+ sub "\ufe9b", "th" # ﺛ
761
+ sub "\ufe9c", "th" # ﺜ
762
+ sub "\ufe9a", "th" # ﺚ
763
+
764
+ sub "\u062c", "j" # ج
765
+ sub "\ufe9f", "j" # ﺟ
766
+ sub "\ufea0", "j" # ﺠ
767
+ sub "\ufe9e", "j" # ﺞ
768
+
769
+ sub "\u062d", "ḥ" # ح
770
+ sub "\ufea3", "ḥ" # ﺣ
771
+ sub "\ufea4", "ḥ" # ﺤ
772
+ sub "\ufea2", "ḥ" # ﺢ
773
+
774
+ sub "\u062e", "kh" # خ
775
+ sub "\ufea7", "kh" # ﺧ
776
+ sub "\ufea8", "kh" # ﺨ
777
+ sub "\ufea6", "kh" # ﺦ
778
+
779
+ sub "\u062f", "d" # د
780
+ sub "\ufeaa", "d" # ﺪ
781
+ # Initial
782
+ sub "\ufea3", "ḥ" # ﺣ
783
+ sub "\ufebb", "ṣ" # ﺻ
784
+ sub "\ufebf", "ḍ" # ﺿ
785
+ sub "\ufec3", "ṭ" # ﻃ
786
+ sub "\ufec7", "ẓ" # ﻇ
787
+
788
+ sub "\u0630", "dh" # ذ
789
+ sub "\ufeac", "dh" # ﺬ
790
+
791
+ sub "\u0631", "r" # ر
792
+ sub "\ufeae", "r" # ﺮ
793
+
794
+ sub "\u0632", "z" # ز
795
+ sub "\ufeb0", "z" # ﺰ
796
+
797
+ sub "\u0633", "s" # س
798
+ sub "\ufeb3", "s" # ﺳ
799
+ sub "\ufeb4", "s" # ﺴ
800
+ sub "\ufeb2", "s" # ﺲ
801
+
802
+ sub "\u0634", "sh" # ش
803
+ sub "\ufeb7", "sh" # ﺷ
804
+ sub "\ufeb8", "sh" # ﺸ
805
+ sub "\ufeb6", "sh" # ﺶ
806
+
807
+ sub "\u0635", "ṣ" # ص
808
+ sub "\ufebb", "ṣ" # ﺻ
809
+ sub "\ufebc", "ṣ" # ﺼ
810
+ sub "\ufeba", "ṣ" # ﺺ
811
+
812
+ sub "\u0636", "ḍ" # ض
813
+ sub "\ufebf", "ḍ" # ﺿ
814
+ sub "\ufec0", "ḍ" # ﻀ
815
+ sub "\ufebe", "ḍ" # ﺾ
816
+
817
+ sub "\u0637", "ṭ" # ط
818
+ sub "\ufec3", "ṭ" # ﻃ
819
+ sub "\ufec4", "ṭ" # ﻄ
820
+ sub "\ufec2", "ṭ" # ﻂ
821
+
822
+ sub "\u0638", "ẓ" # ظ
823
+ sub "\ufec7", "ẓ" # ﻇ
824
+ sub "\ufec8", "ẓ" # ﻈ
825
+ sub "\ufec6", "ẓ" # ﻆ
826
+
827
+ sub "\u0639", "‘" # ع
828
+ sub "\ufecb", "‘" # ﻋ
829
+ sub "\ufecc", "‘" # ﻌ
830
+ sub "\ufeca", "‘" # ﻊ
831
+
832
+ sub "\u063a", "gh" # غ
833
+ sub "\ufecf", "gh" # ﻏ
834
+ sub "\ufed0", "gh" # ﻐ
835
+ sub "\ufece", "gh" # ﻎ
836
+
837
+ sub "\u0641", "f" # ف
838
+ sub "\ufed3", "f" # ﻓ
839
+ sub "\ufed4", "f" # ﻔ
840
+ sub "\ufed2", "f" # ﻒ
841
+
842
+ sub "\u0642", "q" # ق
843
+ sub "\ufed7", "q" # ﻗ
844
+ sub "\ufed8", "q" # ﻘ
845
+ sub "\ufed6", "q" # ﻖ
846
+
847
+ sub "\u0643", "k" # ك
848
+ sub "\ufedb", "k" # ﻛ
849
+ sub "\ufedc", "k" # ﻜ
850
+ sub "\ufeda", "k" # ﻚ
851
+
852
+ sub "\u0644", "l" # ل
853
+ sub "\ufedf", "l" # ﻟ
854
+ sub "\ufee0", "l" # ﻠ
855
+ sub "\ufede", "l" # ﻞ
856
+
857
+ sub "\u0645", "m" # م
858
+ sub "\ufee3", "m" # ﻣ
859
+ sub "\ufee4", "m" # ﻤ
860
+ sub "\ufee2", "m" # ﻢ
861
+
862
+ sub "\u0646", "n" # ن
863
+ sub "\ufee7", "n" # ﻧ
864
+ sub "\ufee8", "n" # ﻨ
865
+ sub "\ufee6", "n" # ﻦ
866
+
867
+ # See note C
868
+ sub "\u0647", "h" # ه
869
+ sub "\ufeeb", "h" # ﻫ
870
+ sub "\ufeec", "h" # ﻬ
871
+ sub "\ufeea", "h" # ﻪ
872
+
873
+ sub "\u0648", "w" # و
874
+ sub "\ufeee", "w" # ﻮ
875
+
876
+ sub "\u064a", "y" # ي
877
+ sub "\ufef3", "y" # ﻳ
878
+ sub "\ufef4", "y" # ﻴ
879
+ sub "\ufef1", "y" # ﻱ
880
+
881
+ # (A) Not romanized word-initially.
882
+
883
+ # (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
884
+
885
+ # (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
886
+
887
+
888
+ # Vowels, diphthongs and diacritical marks
889
+ # (ـ stands for any consonant)
890
+
891
+
892
+ # Vowels and Diphthongs
893
+ sub "\u064e", "a"
894
+ sub "\u064f", "u"
895
+ sub "\u0650", "i"
896
+ sub "\u064e\u0627", "ā" # see Rule 5
897
+ sub "\u064e\u0649", "á" # see Rule 6(a)
898
+ sub "\u064f\u0648", "ū"
899
+ sub "\u0650\u064a", "ī"
900
+ sub "\u064e\u0648\u0652", "aw"
901
+ sub "\u064e\u064a\u0652", "ay"
902
+
903
+ # Letters Representing Non-Arabic Consonants
904
+ # (this list in not exhaustive)
905
+ sub "\u06af", "g" # گ
906
+ sub "\u06b4", "ñ" # ڴ
907
+ sub "\u067e", "p" # پ
908
+ sub "\u0686", any(["ch", "zh"]) # چ
909
+ sub "\u0698", "zh" # ژ
910
+ sub "\u06a4", "v" # ڤ
911
+ sub "\u06cb", "v" # ۋ
912
+ sub "\u06a5", "v" # ڥ
913
+
914
+ # Arabic standard Unicode block
915
+ sub "\u0600", "" # ؀
916
+ sub "\u0601", "" # ؁
917
+ sub "\u0602", "" # ؂
918
+ sub "\u0603", "" # ؃
919
+ sub "\u0604", "" # ؄
920
+ sub "\u0605", "" # ؅
921
+ sub "\u0606", "" # ؆
922
+ sub "\u0607", "" # ؇
923
+ sub "\u0608", "" # ؈
924
+ sub "\u0609", "" # ؉
925
+ sub "\u060a", "" # ؊
926
+ sub "\u060b", "" # ؋
927
+ sub "\u060d", "" # ؍
928
+ sub "\u060e", "" # ؎
929
+ sub "\u060f", "" # ؏
930
+ sub "\u0610", "" # ؐ
931
+ sub "\u0611", "" # ؑ
932
+ sub "\u0612", "" # ؒ
933
+ sub "\u0613", "" # ؓ
934
+ sub "\u0614", "" # ؔ
935
+ sub "\u0615", "" # ؕ
936
+ sub "\u0616", "" # ؖ
937
+ sub "\u0617", "" # ؗ
938
+ sub "\u0618", "" # ؘ
939
+ sub "\u0619", "" # ؙ
940
+ sub "\u061a", "" # ؚ
941
+ sub "\u061b", "" # ؛
942
+ sub "\u061c", "" #
943
+ sub "\u061d", "" #
944
+ sub "\u061e", "" # ؞
945
+ sub "\u061f", "" # ؟
946
+ sub "\u0620", "" # ؠ
947
+ sub "\u0628", "b" # ب
948
+
949
+ # '\u0629': 'h' # ة -- see Note 3
950
+ sub "\u062a", "t" # ت
951
+ sub "\u062b", "th" # ث
952
+ sub "\u062c", "j" # ج
953
+ sub "\u062d", "ḥ" # ح
954
+ sub "\u062e", "kh" # خ
955
+ sub "\u062f", "d" # د
956
+ sub "\u0630", "dh" # ذ
957
+ sub "\u0631", "r" # ر
958
+ sub "\u0632", "z" # ز
959
+ sub "\u0633", "s" # س
960
+ sub "\u0634", "sh" # ش
961
+ sub "\u0635", "ṣ" # ص
962
+ sub "\u0636", "ḍ" # ض
963
+ sub "\u0637", "ṭ" # ط
964
+ sub "\u0638", "ẓ" # ظ
965
+ sub "\u0639", "‘" # ع
966
+ sub "\u063a", "gh" # غ
967
+ sub "\u063b", "" # ػ
968
+ sub "\u063c", "" # ؼ
969
+ sub "\u063d", "" # ؽ
970
+ sub "\u063e", "" # ؾ
971
+ sub "\u063f", "" # ؿ
972
+ sub "\u0640", "" # ـ
973
+ sub "\u0641", "f" # ف -- see Note 2
974
+ sub "\u0642", "q" # ق -- see Note 2
975
+ sub "\u0643", "k" # ك
976
+ sub "\u0644", "l" # ل
977
+ sub "\u0645", "m" # م
978
+ sub "\u0646", "n" # ن
979
+
980
+ # '\u0647': 'h' # ه -- see Note 3
981
+ sub "\u0648", "w" # و
982
+
983
+ # '\u064a': 'y' # ي
984
+ # '\u064e': '' # َ
985
+ # '\u064f': '' # ُ
986
+ # '\u0650': '' # ِ
987
+ # '\u0652': '' # ْ
988
+ # '\u0653': '' # ٓ
989
+ # '\u0654': '' # ٔ
990
+ # '\u0655': '' # ٕ
991
+ # '\u0656': '' # ٖ
992
+ # '\u0657': '' # ٗ
993
+ # '\u0658': '' # ٘
994
+ # '\u0659': '' # ٙ
995
+ # '\u065a': '' # ٚ
996
+ # '\u065b': '' # ٛ
997
+ # '\u065c': '' # ٜ
998
+ # '\u065d': '' # ٝ
999
+ # '\u065e': '' # ٞ
1000
+ # '\u065f': '' # ٟ
1001
+ # '\u0660': '' # ٠
1002
+ sub "\u0661", "" # ١
1003
+ sub "\u0662", "" # ٢
1004
+ sub "\u0663", "" # ٣
1005
+ sub "\u0664", "" # ٤
1006
+ sub "\u0665", "" # ٥
1007
+ sub "\u0666", "" # ٦
1008
+ sub "\u0667", "" # ٧
1009
+ sub "\u0668", "" # ٨
1010
+ sub "\u0669", "" # ٩
1011
+ sub "\u066a", "" # ٪
1012
+ sub "\u066b", "" # ٫
1013
+ sub "\u066c", "" # ٬
1014
+ sub "\u066d", "" # ٭
1015
+ sub "\u066e", "" # ٮ
1016
+ sub "\u066f", "" # ٯ
1017
+ sub "\u0670", "" # ٰ
1018
+ sub "\u0671", "" # ٱ
1019
+ sub "\u0672", "" # ٲ
1020
+ sub "\u0673", "" # ٳ
1021
+ sub "\u0674", "" # ٴ
1022
+ sub "\u0675", "" # ٵ
1023
+ sub "\u0676", "" # ٶ
1024
+ sub "\u0677", "" # ٷ
1025
+ sub "\u0678", "" # ٸ
1026
+ sub "\u0679", "" # ٹ
1027
+ sub "\u067a", "" # ٺ
1028
+ sub "\u067b", "" # ٻ
1029
+ sub "\u067c", "" # ټ
1030
+ sub "\u067d", "" # ٽ
1031
+
1032
+ # '\u067e': 'p' # پ
1033
+ sub "\u067f", "" # ٿ
1034
+ sub "\u0680", "" # ڀ
1035
+ sub "\u0681", "" # ځ
1036
+ sub "\u0682", "" # ڂ
1037
+ sub "\u0683", "" # ڃ
1038
+ sub "\u0684", "" # ڄ
1039
+ sub "\u0685", "" # څ
1040
+
1041
+ # '\u0686': 'ch' # چ
1042
+ sub "\u0687", "" # ڇ
1043
+ sub "\u0688", "" # ڈ
1044
+ sub "\u0689", "" # ډ
1045
+ sub "\u068a", "" # ڊ
1046
+ sub "\u068b", "" # ڋ
1047
+ sub "\u068c", "" # ڌ
1048
+ sub "\u068d", "" # ڍ
1049
+ sub "\u068e", "" # ڎ
1050
+ sub "\u068f", "" # ڏ
1051
+ sub "\u0690", "" # ڐ
1052
+ sub "\u0691", "" # ڑ
1053
+ sub "\u0692", "" # ڒ
1054
+ sub "\u0693", "" # ړ
1055
+ sub "\u0694", "" # ڔ
1056
+ sub "\u0695", "" # ڕ
1057
+ sub "\u0696", "" # ږ
1058
+ sub "\u0697", "" # ڗ
1059
+
1060
+ # '\u0698': 'zh' # ژ
1061
+ sub "\u0699", "" # ڙ
1062
+ sub "\u069a", "" # ښ
1063
+ sub "\u069b", "" # ڛ
1064
+ sub "\u069c", "" # ڜ
1065
+ sub "\u069d", "" # ڝ
1066
+ sub "\u069e", "" # ڞ
1067
+ sub "\u069f", "" # ڟ
1068
+ sub "\u06a0", "" # ڠ
1069
+ sub "\u06a1", "" # ڡ
1070
+ sub "\u06a2", "" # ڢ
1071
+ sub "\u06a3", "" # ڣ
1072
+
1073
+ # '\u06a4': 'v' # ڤ
1074
+
1075
+ # '\u06a5': 'v' # ڥ
1076
+ sub "\u06a6", "" # ڦ
1077
+ sub "\u06a7", "" # ڧ
1078
+ sub "\u06a8", "" # ڨ
1079
+ sub "\u06a9", "" # ک
1080
+ sub "\u06aa", "" # ڪ
1081
+ sub "\u06ab", "" # ګ
1082
+ sub "\u06ac", "" # ڬ
1083
+ sub "\u06ad", "" # ڭ
1084
+ sub "\u06ae", "" # ڮ
1085
+
1086
+ # '\u06af': 'g' # گ
1087
+ sub "\u06b0", "" # ڰ
1088
+ sub "\u06b1", "" # ڱ
1089
+ sub "\u06b2", "" # ڲ
1090
+ sub "\u06b3", "" # ڳ
1091
+
1092
+ # '\u06b4': 'ñ' # ڴ
1093
+ sub "\u06b5", "" # ڵ
1094
+ sub "\u06b6", "" # ڶ
1095
+ sub "\u06b7", "" # ڷ
1096
+ sub "\u06b8", "" # ڸ
1097
+ sub "\u06b9", "" # ڹ
1098
+ sub "\u06ba", "" # ں
1099
+ sub "\u06bb", "" # ڻ
1100
+ sub "\u06bc", "" # ڼ
1101
+ sub "\u06bd", "" # ڽ
1102
+ sub "\u06be", "" # ھ
1103
+ sub "\u06bf", "" # ڿ
1104
+ sub "\u06c0", "" # ۀ
1105
+ sub "\u06c1", "" # ہ
1106
+ sub "\u06c2", "" # ۂ
1107
+ sub "\u06c3", "" # ۃ
1108
+ sub "\u06c4", "" # ۄ
1109
+ sub "\u06c5", "" # ۅ
1110
+ sub "\u06c6", "" # ۆ
1111
+ sub "\u06c7", "" # ۇ
1112
+ sub "\u06c8", "" # ۈ
1113
+ sub "\u06c9", "" # ۉ
1114
+ sub "\u06ca", "" # ۊ
1115
+
1116
+ # '\u06cb': 'v' # ۋ
1117
+ sub "\u06cc", "" # ی
1118
+ sub "\u06cd", "" # ۍ
1119
+ sub "\u06ce", "" # ێ
1120
+ sub "\u06cf", "" # ۏ
1121
+ sub "\u06d0", "" # ې
1122
+ sub "\u06d1", "" # ۑ
1123
+ sub "\u06d2", "" # ے
1124
+ sub "\u06d3", "" # ۓ
1125
+ sub "\u06d4", "" # ۔
1126
+ sub "\u06d5", "" # ە
1127
+ sub "\u06d6", "" # ۖ
1128
+ sub "\u06d7", "" # ۗ
1129
+ sub "\u06d8", "" # ۘ
1130
+ sub "\u06d9", "" # ۙ
1131
+ sub "\u06da", "" # ۚ
1132
+ sub "\u06db", "" # ۛ
1133
+ sub "\u06dc", "" # ۜ
1134
+ sub "\u06dd", "" # ۝
1135
+ sub "\u06de", "" # ۞
1136
+ sub "\u06df", "" # ۟
1137
+ sub "\u06e0", "" # ۠
1138
+ sub "\u06e1", "" # ۡ
1139
+ sub "\u06e2", "" # ۢ
1140
+ sub "\u06e3", "" # ۣ
1141
+ sub "\u06e4", "" # ۤ
1142
+ sub "\u06e5", "" # ۥ
1143
+ sub "\u06e6", "" # ۦ
1144
+ sub "\u06e7", "" # ۧ
1145
+ sub "\u06e8", "" # ۨ
1146
+ sub "\u06e9", "" # ۩
1147
+ sub "\u06ea", "" # ۪
1148
+ sub "\u06eb", "" # ۫
1149
+ sub "\u06ec", "" # ۬
1150
+ sub "\u06ed", "" # ۭ
1151
+ sub "\u06ee", "" # ۮ
1152
+ sub "\u06ef", "" # ۯ
1153
+ sub "\u06f0", "" # ۰
1154
+ sub "\u06f1", "" # ۱
1155
+ sub "\u06f2", "" # ۲
1156
+ sub "\u06f3", "" # ۳
1157
+ sub "\u06f4", "" # ۴
1158
+ sub "\u06f5", "" # ۵
1159
+ sub "\u06f6", "" # ۶
1160
+ sub "\u06f7", "" # ۷
1161
+ sub "\u06f8", "" # ۸
1162
+ sub "\u06f9", "" # ۹
1163
+ sub "\u06fa", "" # ۺ
1164
+ sub "\u06fb", "" # ۻ
1165
+ sub "\u06fc", "" # ۼ
1166
+ sub "\u06fd", "" # ۽
1167
+ sub "\u06fe", "" # ۾
1168
+ sub "\u06ff", "" # ۿ
1169
+ }
1170
+
1171
+ }