interscript 0.1.2 → 0.1.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (261) hide show
  1. checksums.yaml +4 -4
  2. data/README.adoc +250 -17
  3. data/lib/g2pwrapper.py +34 -0
  4. data/lib/interscript.rb +142 -20
  5. data/lib/interscript/command.rb +28 -0
  6. data/lib/interscript/fs.rb +69 -0
  7. data/lib/interscript/mapping.rb +142 -0
  8. data/lib/interscript/opal.rb +57 -0
  9. data/lib/interscript/opal/entrypoint.rb +12 -0
  10. data/lib/interscript/opal/map_translate.rb +7 -0
  11. data/lib/interscript/opal/maps.js.erb +10 -0
  12. data/lib/interscript/version.rb +1 -1
  13. data/lib/model-7 +0 -0
  14. data/lib/tha-pt-b-7 +0 -0
  15. data/maps/acadsin-zho-Hani-Latn-2002.yaml +38916 -0
  16. data/maps/alalc-amh-Ethi-Latn-1997.yaml +513 -0
  17. data/maps/alalc-amh-Ethi-Latn-2011.yaml +138 -0
  18. data/maps/alalc-ara-Arab-Latn-1997.yaml +1287 -0
  19. data/maps/alalc-asm-Deva-Latn-1997.yaml +165 -0
  20. data/maps/alalc-asm-Deva-Latn-2012.yaml +40 -0
  21. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +145 -0
  22. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +129 -0
  23. data/maps/alalc-ben-Beng-Latn-2017.yaml +130 -0
  24. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +98 -0
  25. data/maps/alalc-ell-Grek-Latn-1997.yaml +628 -0
  26. data/maps/alalc-ell-Grek-Latn-2010.yaml +626 -0
  27. data/maps/alalc-guj-Gujr-Latn-1997.yaml +266 -0
  28. data/maps/alalc-guj-Gujr-Latn-2011.yaml +64 -0
  29. data/maps/alalc-hin-Deva-Latn-1997.yaml +211 -0
  30. data/maps/alalc-hin-Deva-Latn-2011.yaml +47 -0
  31. data/maps/alalc-kat-Geok-Latn-1997.yaml +111 -0
  32. data/maps/alalc-kat-Geor-Latn-1997.yaml +150 -0
  33. data/maps/alalc-kor-Hang-Latn-1997.yaml +98 -0
  34. data/maps/alalc-mal-Mlym-Latn-1997.yaml +303 -0
  35. data/maps/alalc-mal-Mlym-Latn-2012.yaml +73 -0
  36. data/maps/alalc-mar-Deva-Latn-1997.yaml +189 -0
  37. data/maps/alalc-mar-Deva-Latn-2011.yaml +45 -0
  38. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +114 -0
  39. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +103 -0
  40. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +220 -0
  41. data/maps/alalc-pan-Guru-Latn-1997.yaml +256 -0
  42. data/maps/alalc-pan-Guru-Latn-2011.yaml +78 -0
  43. data/maps/alalc-per-Arab-Latn-1997.yaml +375 -0
  44. data/maps/alalc-pli-Deva-Latn-2012.yaml +144 -0
  45. data/maps/alalc-pra-Deva-Latn-2012.yaml +47 -0
  46. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +225 -0
  47. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +162 -0
  48. data/maps/alalc-san-Deva-Latn-2012.yaml +172 -0
  49. data/maps/alalc-sin-Sinh-Latn-1997.yaml +292 -0
  50. data/maps/alalc-sin-Sinh-Latn-2011.yaml +71 -0
  51. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +118 -0
  52. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +135 -0
  53. data/maps/alalc-tam-Taml-Latn-1997.yaml +62 -0
  54. data/maps/alalc-tam-Taml-Latn-2011.yaml +58 -0
  55. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +145 -0
  56. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +16 -0
  57. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +283 -0
  58. data/maps/{bas-rus-Cyrl-Latn-bss.yaml → bas-rus-Cyrl-Latn-2017-bss.yaml} +58 -33
  59. data/maps/{bas-rus-Cyrl-Latn-oss.yaml → bas-rus-Cyrl-Latn-2017-oss.yaml} +55 -35
  60. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +292 -0
  61. data/maps/bgn-kor-Hang-Latn-1943.yaml +35 -0
  62. data/maps/bgn-kor-Kore-Latn-1943.yaml +31 -0
  63. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +208 -0
  64. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +208 -0
  65. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +532 -0
  66. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +596 -0
  67. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +2 -3
  68. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +104 -0
  69. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +188 -0
  70. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +289 -0
  71. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +119 -0
  72. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +15 -65
  73. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +184 -0
  74. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +705 -0
  75. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +23 -0
  76. data/maps/{bgnpcgn-per-Arab-Latn-1956.yaml → bgnpcgn-fas-Arab-Latn-1956.yaml} +5 -2
  77. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +257 -0
  78. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +131 -0
  79. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +42 -0
  80. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +253 -0
  81. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +48 -0
  82. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +48 -0
  83. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +163 -0
  84. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +190 -0
  85. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +223 -0
  86. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +230 -0
  87. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +336 -0
  88. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +639 -0
  89. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +459 -0
  90. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +168 -0
  91. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +150 -65
  92. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +170 -0
  93. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +220 -0
  94. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +240 -0
  95. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +80 -4
  96. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +119 -0
  97. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +127 -0
  98. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +82 -0
  99. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +7456 -0
  100. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +159 -0
  101. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +156 -0
  102. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +184 -0
  103. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +181 -0
  104. data/maps/bis-knd-Knda-Latn-13194-1991.yaml +173 -0
  105. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +176 -0
  106. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +160 -0
  107. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +175 -0
  108. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +170 -0
  109. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +155 -0
  110. data/maps/by-bel-Cyrl-Latn-1998.yaml +172 -0
  111. data/maps/by-bel-Cyrl-Latn-2007.yaml +115 -0
  112. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +899 -0
  113. data/maps/din-hin-Deva-Latn-33904-2018.yaml +100 -0
  114. data/maps/din-kat-Geor-Latn-32707-2010.yaml +145 -0
  115. data/maps/din-mar-Deva-Latn-33904-2018.yaml +84 -0
  116. data/maps/din-nep-Deva-Latn-33904-2018.yaml +119 -0
  117. data/maps/din-pli-Deva-Latn-33904-2018.yaml +75 -0
  118. data/maps/din-pra-Deva-Latn-33904-2018.yaml +63 -0
  119. data/maps/din-san-Deva-Latn-33904-2018.yaml +338 -0
  120. data/maps/din-tam-Taml-Latn-33903-2016.yaml +213 -0
  121. data/maps/dos-nep-Deva-Latn-1997.yaml +47 -0
  122. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +684 -0
  123. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +680 -0
  124. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +19 -0
  125. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +31 -0
  126. data/maps/ggg-kat-Geor-Latn-2002.yaml +92 -0
  127. data/maps/gki-bel-Cyrl-Latn-1992.yaml +33 -0
  128. data/maps/gki-bel-Cyrl-Latn-2000.yaml +201 -0
  129. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +190 -0
  130. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +157 -0
  131. data/maps/hk-yue-Hani-Latn-1888.yaml +38497 -0
  132. data/maps/icao-bel-Cyrl-Latn-9303.yaml +109 -98
  133. data/maps/icao-bul-Cyrl-Latn-9303.yaml +2 -7
  134. data/maps/{icao-per-Arab-Latn-9303.yaml → icao-fas-Arab-Latn-9303.yaml} +6 -8
  135. data/maps/icao-heb-Hebr-Latn-9303.yaml +119 -125
  136. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +2 -3
  137. data/maps/icao-rus-Cyrl-Latn-9303.yaml +2 -4
  138. data/maps/icao-srp-Cyrl-Latn-9303.yaml +2 -3
  139. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +2 -4
  140. data/maps/iso-ara-Arab-Latn-233-1984.yaml +323 -0
  141. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +75 -0
  142. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +175 -0
  143. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +613 -0
  144. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +44 -0
  145. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +220 -0
  146. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +87 -0
  147. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +61 -0
  148. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +66 -0
  149. data/maps/iso-kan-Knda-Latn-15919-2001.yaml +220 -0
  150. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +145 -0
  151. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +240 -0
  152. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +226 -0
  153. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +281 -0
  154. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +75 -0
  155. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +87 -0
  156. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +193 -0
  157. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +222 -0
  158. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +73 -0
  159. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +74 -0
  160. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +219 -0
  161. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +55 -0
  162. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +59 -0
  163. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +366 -0
  164. data/maps/{iso-rus-Cyrl-Latn-iso9.yaml → iso-rus-Cyrl-Latn-9-1995.yaml} +4 -6
  165. data/maps/iso-san-Deva-Latn-15919-2001.yaml +220 -0
  166. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +159 -0
  167. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +220 -0
  168. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +109 -0
  169. data/maps/kp-kor-Hang-Latn-2002.yaml +909 -0
  170. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +44820 -0
  171. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +411 -0
  172. data/maps/mns-mon-Cyrl-Latn-5217-2012.yaml +163 -0
  173. data/maps/mns-mon-Latn-Cyrl-5217-2012.yaml +200 -0
  174. data/maps/moct-kor-Hang-Latn-2000.yaml +807 -0
  175. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +541 -0
  176. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +225 -0
  177. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +63 -0
  178. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +109 -0
  179. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +37 -0
  180. data/maps/odni-ara-Arab-Latn-2015.yaml +425 -0
  181. data/maps/odni-aze-Cyrl-Latn-2015.yaml +144 -0
  182. data/maps/odni-bel-Cyrl-Latn-2015.yaml +148 -0
  183. data/maps/odni-bul-Cyrl-Latn-2015.yaml +96 -0
  184. data/maps/odni-che-Cyrl-Latn-2015.yaml +169 -0
  185. data/maps/odni-fas-Arab-Latn-2015.yaml +406 -0
  186. data/maps/odni-hin-Deva-Latn-2015.yaml +258 -0
  187. data/maps/odni-kat-Geor-Latn-2015.yaml +87 -0
  188. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +148 -0
  189. data/maps/odni-kir-Cyrl-Latn-2015.yaml +136 -0
  190. data/maps/odni-kor-Hang-Latn-2015.yaml +375 -0
  191. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +122 -0
  192. data/maps/odni-per-Arab-Latn-2015.yaml +228 -0
  193. data/maps/odni-rus-Cyrl-Latn-2015.yaml +77 -0
  194. data/maps/odni-srp-Cyrl-Latn-2015.yaml +129 -0
  195. data/maps/odni-tat-Cyrl-Latn-2015.yaml +142 -0
  196. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +148 -0
  197. data/maps/odni-uig-Cyrl-Latn-2015.yaml +138 -0
  198. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +157 -0
  199. data/maps/odni-urd-Arab-Latn-2015.yaml +221 -0
  200. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +166 -0
  201. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +90 -0
  202. data/maps/royin-tha-Thai-Latn-1968.yaml +183 -0
  203. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +180 -0
  204. data/maps/royin-tha-Thai-Latn-1999.yaml +80 -0
  205. data/maps/{cn-chn-Hans-Latn-pinyin.yaml → sac-zho-Hans-Latn-1979.yaml} +11 -8
  206. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +389 -0
  207. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +354 -0
  208. data/maps/ses-ara-Arab-Latn-1930.yaml +283 -0
  209. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +222 -0
  210. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +197 -0
  211. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +75 -0
  212. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +192 -0
  213. data/maps/un-amh-Ethi-Latn-2016.yaml +602 -0
  214. data/maps/un-ara-Arab-Latn-1971.yaml +139 -0
  215. data/maps/un-ara-Arab-Latn-1972.yaml +159 -0
  216. data/maps/un-ara-Arab-Latn-2017.yaml +420 -0
  217. data/maps/un-bel-Cyrl-Latn-2007.yaml +114 -0
  218. data/maps/un-ben-Beng-Latn-2016.yaml +534 -0
  219. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +780 -0
  220. data/maps/un-ell-Grek-Latn-1987-tl.yaml +31 -0
  221. data/maps/un-ell-Grek-Latn-1987-ts.yaml +19 -0
  222. data/maps/un-hin-Deva-Latn-2016.yaml +222 -0
  223. data/maps/un-mar-Deva-Latn-2016.yaml +91 -0
  224. data/maps/un-mon-Mong-Latn-general-2013.yaml +264 -0
  225. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +264 -0
  226. data/maps/un-nep-Deva-Latn-1972.yaml +350 -0
  227. data/maps/un-nep-Deva-Latn-2013.yaml +74 -0
  228. data/maps/un-rus-Cyrl-Latn-1987.yaml +166 -0
  229. data/maps/un-ukr-Cyrl-Latn-1998.yaml +53 -0
  230. data/maps/un-ukr-Cyrl-Latn-2012.yaml +162 -0
  231. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +221 -0
  232. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +406 -0
  233. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +386 -0
  234. data/maps/var-kor-Hang-Hang-jamo.yaml +11193 -0
  235. data/maps/var-kor-Hang-Latn-mr-1939.yaml +1054 -0
  236. data/maps/var-kor-Kore-Hang-2013.yaml +59754 -0
  237. data/maps/var-kor-Kore-Latn-mr-1939.yaml +36 -0
  238. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +43 -0
  239. data/maps/var-mon-Mong-Latn-1930.yaml +102 -0
  240. data/maps/var-mon-Mong-Latn-lessing.yaml +272 -0
  241. data/maps/var-mon-Mong-Latn-vpmc.yaml +274 -0
  242. data/maps/var-pra-Deva-Latn-iast-1912.yaml +30 -0
  243. data/maps/var-san-Deva-Latn-iast-1912.yaml +149 -0
  244. data/maps/var-tha-Thai-Thai-phonemic.yaml +59 -0
  245. data/maps/var-tha-Thai-Zsym-ipa.yaml +301 -0
  246. data/maps/var-zho-Hani-Latn-wd-1979.yaml +38912 -0
  247. data/spec/interscript/filenames_spec.rb +384 -0
  248. data/spec/interscript/mapping_spec.rb +42 -0
  249. data/spec/interscript_spec.rb +23 -5
  250. data/spec/spec_helper.rb +3 -1
  251. metadata +364 -34
  252. data/bin/interscript +0 -20
  253. data/bin/rspec +0 -29
  254. data/maps/bgnpcgn-chn-Hans-Latn-pinyin.yaml +0 -7503
  255. data/maps/historic-jpn-Hrkt-Latn-hepburn.yaml +0 -336
  256. data/maps/icao-gre-Grek-Latn-9303.yaml +0 -101
  257. data/maps/mext-jpn-Hrkt-Latn-hepburn.yaml +0 -330
  258. data/maps/mext-jpn-Hrkt-Latn-kunrei.yaml +0 -308
  259. data/maps/un-jpn-Hrkt-Latn-hepburn.yaml +0 -313
  260. data/maps/un-jpn-Hrkt-Latn-kunrei.yaml +0 -354
  261. data/maps/un-mon-Mong-Latn-2013.yaml +0 -80
@@ -0,0 +1,138 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 2011
4
+ language: iso-639-2:amh
5
+ source_script: Ethi
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- Amharic (2011)
8
+ url: https://www.loc.gov/catdir/cpso/romanization/amharic.pdf
9
+ creation_date: 2011
10
+ description: ''
11
+ notes: |
12
+ The Ethiopic script used for Amharic is also used for other languages, including Ge’ez, Argobba, Gurage, and Tigre. Ge’ez, which is chiefly a liturgical language, uses only 26 basic letter forms from this table.
13
+
14
+ tests:
15
+ - source: የዜግነት ክብር በ ኢትዮጵያችን ጸንቶ
16
+ expected: yazégenate kebere ba ʼiteyop̣eyāčene ṣaneto
17
+ - source: ታየ ሕዝባዊነት ዳር እስከዳር በርቶ
18
+ expected: tāya ḥezebāwinate dāre ʼesekadāre bareto
19
+ - source: ለሰላም ለፍትህ ለሕዝቦች ነጻነት
20
+ expected: lasalāme lafetehe laḥezeboče naṣānate
21
+ - source: በእኩልነት በፍቅር ቆመናል ባንድነት
22
+ expected: baʼekulenate bafeqere qomanāle bānedenate
23
+ - source: መሠረተ ፅኑ ሰብዕናን ያልሻርን
24
+ expected: maśarata ṡenu sabeʻenāne yālešārene
25
+ - source: ሕዝቦች ነን ለሥራ በሥራ የኖርን
26
+ expected: ḥezeboče nane laśerā baśerā yanorene
27
+ - source: ድንቅ የባህል መድረክ ያኩሪ ቅርስ ባለቤት
28
+ expected: deneqe yabāhele maderake yākuri qerese bālabéte
29
+ - source: የተፈጥሮ ጸጋ የጀግና ሕዝብ እናት
30
+ expected: yatafaṭero ṣagā yaǧagenā ḥezebe ʼenāte
31
+ - source: እንጠብቅሻለን አለብን አደራ
32
+ expected: ʼeneṭabeqešālane ʼalabene ʼadarā
33
+ - source: ኢትዮጵያችን ኑሪ እኛም ባንቺ እንኩራ
34
+ expected: ʼiteyop̣eyāčene nuri ʼeñāme bāneči ʼenekurā
35
+ - source: ቋንቋ የድምጽ፣ የምልክት ወይም የምስል ቅንብር ሆኖ
36
+ expected: qwāneqwā yademeṣe፣ yamelekete wayeme yamesele qenebere hono
37
+ - source: ለማሰብ ወይም የታሰበን ሃሳብ ለሌላ ለማስተላለፍ የሚረዳ መሳሪያ ነው
38
+ expected: lamāsabe wayeme yatāsabane hāsābe lalélā lamāsetalālafe yamiradā masāriyā nawe
39
+ - source: በአጭሩ ቋንቋ የምልክቶች ስርዓትና እኒህን ምልክቶች ለማቀናበር
40
+ expected: baʼaċeru qwāneqwā yameleketoče sereʻātenā ʼenihene meleketoče lamāqanābare
41
+ - source: የሚያስፈልጉ ህጎች ጥንቅር ነው። ቋንቋወችን ለመፈረጅ እንዲሁም
42
+ expected: yamiyāsefalegu hegoče ṭeneqere nawe። qwāneqwāwačene lamafaraǧe ʼenedihume
43
+ - source: ለምክፈል የሚያስችሉ መስፈርቶችን ለማስቀመጥ ባለው ችግር
44
+ expected: lamekefale yamiyāsečelu masefaretočene lamāseqamaṭe bālawe čegere
45
+ - source: ምክንያት በአሁኑ ሰዓት በርግጠኝነት ስንት ቋንቋ በዓለም ላይ
46
+ expected: mekeneyāte baʼahunu saʻāte baregeṭañenate senete qwāneqwā baʻālame lāye
47
+ - source: እንዳለ ማወቅ አስቸጋሪ ነው
48
+ expected: ʼenedāla māwaqe ʼasečagāri nawe
49
+ - source: አሰላ
50
+ expected: ʼasalā
51
+ - source: አሶሳ
52
+ expected: ʼasosā
53
+ - source: አንኮበር
54
+ expected: ʼanekobare
55
+ - source: አክሱም
56
+ expected: ʼakesume
57
+ - source: አዋሳ
58
+ expected: ʼawāsā
59
+ - source: አዲስ ዘመን (ከተማ)
60
+ expected: ʼadise zamane (katamā)
61
+ - source: አዲግራት
62
+ expected: ʼadigerāte
63
+ - source: አዳማ
64
+ expected: ʼadāmā
65
+ - source: ደምበጫ
66
+ expected: damebaċā
67
+ - source: ደርባ
68
+ expected: darebā
69
+ - source: ደብረ ማርቆስ
70
+ expected: dabera māreqose
71
+ - source: ደብረ ብርሃን
72
+ expected: dabera berehāne
73
+ - source: ደብረ ታቦር (ከተማ)
74
+ expected: dabera tābore (katamā)
75
+ - source: ደብረ ዘይት
76
+ expected: dabera zayete
77
+ - source: ደገሃቡር
78
+ expected: dagahābure
79
+ - source: ወልቂጤ
80
+ expected: waleqiṭé
81
+ - source: ወልወል
82
+ expected: walewale
83
+ - source: ወልደያ
84
+ expected: waledayā
85
+ - source: ናይሎ ሳህራን
86
+ expected: nāyelo sāherāne
87
+ - source: አኙዋክኛ
88
+ expected: ʼañuwākeñā
89
+ - source: ኡዱክኛ
90
+ expected: ʼudukeñā
91
+ - source: ኦፓኛ
92
+ expected: ʼopāñā
93
+ - source: ጉምዝኛ
94
+ expected: gumezeñā
95
+ - source: አፋርኛ
96
+ expected: ʼafāreñā
97
+ - source: አላባኛ
98
+ expected: ʼalābāñā
99
+ - source: አርቦርኛ
100
+ expected: ʼareboreñā
101
+ - source: ባይሶኛ
102
+ expected: bāyesoñā
103
+ - source: ቡሳኛ
104
+ expected: busāñā
105
+ - source: ራስ ዓሊ (ትልቁ) ፬
106
+ expected: rāse ʻāli (telequ) 4
107
+ - source: ራስ ዓሊጋዝ ፭
108
+ expected: rāse ʻāligāze 5
109
+ - source: ራስ ዐሥራትና ፮
110
+ expected: rāse ʻaśerātenā 6
111
+ - source: ራስ ጉግሣ ፳፮
112
+ expected: rāse gugeśā 206
113
+ - source: ራስ ይማም ፪
114
+ expected: rāse yemāme 2
115
+ - source: ራስ ማርዬ ፫
116
+ expected: rāse māreyé 3
117
+ - source: ራስ ዶሪ ፫ ወር
118
+ expected: rāse dori 3 ware
119
+ - source: ራስ ዓሊ (ትንሹ) ፳
120
+ expected: rāse ʻāli (tenešu) 20
121
+ - source: ዓፄ ቴዎድሮስ ፲፭
122
+ expected: ʻāṡé téwoderose 105
123
+ - source: ዳግማዊ ዓጼ ተክለ ጊዮርጊስ ፫
124
+ expected: dāgemāwi ʻāṣé takela giyoregise 3
125
+ - source: ዓፄ ዮሐንስ ፲፰
126
+ expected: ʻāṡé yoḥanese 108
127
+ - source: ዳግማዊ ዓጼ ምኒልክ ፳፬
128
+ expected: dāgemāwi ʻāṣé menileke 204
129
+ - source: ልጅ ኢያሱ ፫
130
+ expected: leǧe ʼiyāsu 3
131
+ - source: ንግሥት ዘውዲቱ ፲፫
132
+ expected: negeśete zaweditu 103
133
+ - source: ቀዳማዊ ኃይለ ሥላሴ
134
+ expected: qadāmāwi hāyela śelāsé
135
+
136
+
137
+ map:
138
+ inherit: alalc-amh-Ethi-Latn-1997
@@ -0,0 +1,1287 @@
1
+ ---
2
+ authority_id: alalc
3
+ id: 1997
4
+ language: iso-639-2:ara
5
+ source_script: Arab
6
+ destination_script: Latn
7
+ name: ALA-LC Romanization Table -- Arabic (1997)
8
+ alias:
9
+ ogc11122:
10
+ code: ara_Arab2Latn_ALA_1997
11
+ description: Arabic ALA-Library of Congress 1997 System
12
+ url: http://catdir.loc.gov/catdir/cpso/romanization/arabic.pdf
13
+ creation_date: 1997
14
+ description: |
15
+ ALA-LC Romanization table for Arabic
16
+
17
+ notes:
18
+ - For the use of alif to support hamzah, see rule 2. For the romanization of hamzah by the consonantal sign ’ (alif), see rule 8(a). For other orthographic uses of alif see rules 3-5.
19
+
20
+ - The Maghribī variations ڢ and ڧ are romanized f and q respectively.
21
+
22
+ - ة in a word in the construct state is romanized t. See rule 7(b).
23
+
24
+ # Arabic Letters Romanized in Different Ways Depending on Their Context
25
+ - |
26
+ Rule 1 As indicated in the table, ﻭ and ي may represent:
27
+
28
+ (a) The consonants romanized w and y, respectively.
29
+
30
+ waḍ‘ وضع
31
+ ‘iwaḍ عوض
32
+ dalw دلو
33
+ yad يد
34
+ ḥiyal حيل
35
+ ṭahy طهي
36
+
37
+ (b) The long vowels romanized ū, ī, and ā respectively.
38
+
39
+ ūlá أولى
40
+ ṣūrah صورة
41
+ dhū ذو
42
+ īmān إيمان
43
+ jīl جيل
44
+ fī في
45
+ kitāb كتاب
46
+ saḥāb سحاب
47
+ jumān جمان
48
+
49
+ See also rules 11(a) and 11(b)(1-2).
50
+
51
+ (c) The diphthongs romanized aw and ay, respectively.
52
+
53
+ awj أوج
54
+ nawm نوم
55
+ law لو
56
+ aysar أيسر
57
+ shaykh شيخ
58
+ ‘aynay عيني
59
+
60
+ - Rule 2 ا (alif), و and ى when used to support ء (hamzah) are not represented in romanization. See rule 8(a).
61
+
62
+ - Rule 3 ا (alif) when used to support waṣlah ( ٱ ) and maddah ( آ ) is not represented in romanization. See rules 9 and 10.
63
+
64
+ - |
65
+ Rule 4 ا (alif) and و when used as orthographic signs without phonetic significance are not represented in romanization.
66
+
67
+ fa‘alū فعلوا
68
+ ulā’ika أولائك
69
+ ūqīyah أوقية
70
+
71
+ See also rule 12 and examples cited in rules 23-26.
72
+
73
+
74
+ - |
75
+ Rule 5 ا (alif) is used to represent the long vowel romanized ā, as indicated in the table.
76
+
77
+ fā‘il فاعل
78
+ riḍā رضا
79
+
80
+ This alif, when medial, is sometimes omitted in Arabic; it is always indicated in romanization. See rule 19.
81
+
82
+ - |
83
+ Rule 6 Final ى appears in the following special cases:
84
+
85
+ (a) As ﻯ َ (alif maqṣūrah) used in place of َا to represent the long vowel romanized ā.
86
+
87
+ ḥattá حتَّى
88
+ maḍá مضَى
89
+ kubrá كبرَى
90
+ Yaḥyá يحيَى
91
+ musammá مسمَّى
92
+ Muṣṭafá مصطفَى
93
+
94
+ (b) As ِ ﻯّ in nouns and adjectives of the form fā‘īl which are derived from defective roots. This ending is romanized ī, not īy, without regard to the presence of ّ (shaddah). See rule 11(b)(2).
95
+
96
+ Raḍī al-Dīn رضي الدين
97
+
98
+ Compare the fa‘īl form of the same root الرضى[without shaddah] al-Raḍī.
99
+
100
+ (c) As ِ ﻯّ in the relative adjective (nisbah). The ending, like (b) above, is romanized ī, not īy.
101
+
102
+ al-Miṣrī المصرِيّ
103
+
104
+ Compare المصرِيّة al-Miṣrīyah and see rule 11(b)(1).
105
+
106
+ - |
107
+ Rule 7 ة (tā’ marbūṭah)
108
+
109
+ (a) When the noun or adjective ending in ة is indefinite, or is preceded by the definite article, ة is romanized h. The ة in such positions is often replaced by ه.
110
+
111
+ ṣalāh صلاة
112
+ al-Risālah al-bahīyah الرسالة البهية
113
+ mir’āh مرآة
114
+ Urjūzah fī al-ṭibb أرجوزة فى الطب
115
+
116
+
117
+ (b) When the word ending in ة is in the construct state [muḍāf wa-muḍāf ilayh], ة is romanized t.
118
+
119
+ Wizārat al-Tarbiyah وزارة التربية
120
+ Mir’āt al-zamān مرآة الزمان
121
+
122
+
123
+ (c) When the word ending in ة is used adverbially, ة (vocalized ةً) is romanized tan. See rule 12(b).
124
+
125
+ - |
126
+ Rule 8 ء (hamzah)
127
+
128
+ (a) In initial position, whether at the beginning of a word, following a prefixed preposition or conjunction, or following the definite article, ء is not represented in romanization. When medial or final, ء is romanized as ’ (alif).
129
+
130
+ asad أسد
131
+ uns أنس
132
+ idhā إذا
133
+ mas’alah مسألة
134
+ mu’tamar مؤتمر
135
+ dā’im دائم
136
+ mala’a ملأ
137
+ khaṭi’a خطئ
138
+
139
+ (b) ء, when replaced by the sign (waṣlah) and then known as hamzat al-waṣl, is not represented in romanization. See rule 9 below.
140
+
141
+ (waṣlah), like initial ء, is not represented in romanization. See also rule 8(b) above. When the alif which supports waṣlah belongs to the article ال, the initial vowel of the article is romanized a. See rule 17(b). In other words, beginning with hamzat al-waṣl, the initial vowel is romanized i.
142
+
143
+ Riḥlat Ibn Jubayr رحلة ٱبن جبير
144
+ al-istidrāk الإستدراك
145
+ kutub iqtanatʹhā كتب ٱقتنتها
146
+ bi-ihtimām ‘Abd al-Majīd باهتمام عبد ٱلمجيد
147
+
148
+ - |
149
+ Rule 9 (waṣlah), like initial ء, is not represented in romanization.
150
+ See also rule 8(b) above. When the alif which supports waṣlah belongs to the article ال, the initial vowel of the article is romanized a.
151
+ See rule 17(b). In other words, beginning with hamzat al-waṣl, the initial vowel is romanized i.
152
+
153
+ Riḥlat Ibn Jubayr رحلة ٱبن جبير
154
+ al-istidrāk الإستدراك
155
+ kutub iqtanatʹhā كتب ٱقتنتها
156
+ bi-ihtimām ‘Abd al-Majīd باهتمام عبد ٱلمجيد
157
+
158
+ - |
159
+ Rule 10 ˜ (maddah)
160
+
161
+ (a) Initial آ is romanized ā.
162
+
163
+ ālah آلة
164
+ Kullīyat al-Ādāb كلية الآداب
165
+
166
+ (b) Medial آ, when it represents the phonetic combination ’ā, is so romanized.
167
+
168
+ ta’ālīf تآليف
169
+ ma’āthir مآثر
170
+
171
+ (c)˜ is otherwise not represented in romanization.
172
+
173
+ khulafā’ خلفآء
174
+ - |
175
+ Rule 11 ّ (shaddah or tashdīd)
176
+
177
+ (a) Over و
178
+
179
+ (1) ُوّ, representing the combination of long vowel plus consonant, is romanized ūw.
180
+
181
+ ‘adūw عدُوّ
182
+ qūwah قُوّة
183
+
184
+ (2) َوّ, representing the combination of diphthong plus consonant, is romanized aww.
185
+
186
+ Shawwāl شَوّال
187
+ ṣawwara صَوّر
188
+ jaww جوّ
189
+
190
+
191
+ See also rule 1(c).
192
+
193
+ (b) Over ى
194
+
195
+ (1) Medial ِىّ, representing the combination of long vowel plus consonant, is romanized īy.
196
+
197
+ al-Miṣrīyah المصرِيّة
198
+
199
+ See also rule 1(b).
200
+
201
+ (2) Final ِىّ is romanized ī. See rules 6(b) and 6(c).
202
+
203
+ (3) Medial and final َىّ, representing the combination of diphthong plus consonant, is romanized ayy.
204
+
205
+ ayyām أَيّام
206
+ sayyid سَيّد
207
+ Quṣayy قصَيّ
208
+
209
+ See also rule 1(c).
210
+
211
+ (c) Over other letters, ّ is represented in romanization by doubling the letter or digraph concerned.
212
+
213
+ al-Ghazzī الغزّيّ
214
+ al-Kashshāf الكشّاف
215
+
216
+ - |
217
+ Rule 12 Tanwīn may take the written form ٌ, ً (ًا), or ٍ, romanized un, an, and in, respectively. Tanwīn is normally disregarded in romanization, however. It is indicated in the following cases:
218
+
219
+ (a) When it occurs in indefinite nouns derived from defective roots.
220
+
221
+ qāḍin قاضٍ
222
+ ma‘nan معنىً
223
+
224
+ (b) When it indicates the adverbial use of a noun or adjective.
225
+
226
+ ṭab‘an طبعًا
227
+ faj’atan فجأةً
228
+ al-Mushtarik waḍ‘an المشترك وضعاً
229
+ wa-al-muftariq ṣuq‘an والمفترق صقعاً
230
+ - |
231
+ Rule 13 Tanwīn may take the written form ٌ, ً (ًا), or ٍ, romanized un, an, and in, respectively. Tanwīn is normally disregarded in romanization, however. It is indicated in the following cases:
232
+
233
+ (a) When it occurs in indefinite nouns derived from defective roots.
234
+
235
+ qāḍin قاضٍ
236
+ ma‘nan معنىً
237
+
238
+ (b) When it indicates the adverbial use of a noun or adjective.
239
+
240
+ ṭab‘an طبعًا
241
+ faj’atan فجأةً
242
+ al-Mushtarik waḍ‘an المشترك وضعاً
243
+ wa-al-muftariq ṣuq‘an والمفترق صقعاً
244
+
245
+ # Grammatical Structure as It Affects Romanization
246
+ - |
247
+ Rule 13 Final inflections of verbs are retained in romanization, except in pause. represent
248
+
249
+ man waliya Miṣr من ولي مصر
250
+ ma‘rifat mā yajibu la-hum معرفة ما يجب لهم
251
+ ṣallá Allāh ‘alayhi wa-sallam صلى الله عليه وسلم
252
+ al-Lu’lu’ al-maknūn fī ḥukm اللؤلؤ المكنون فى حكم
253
+ al-ikhbār ‘ammā sa-yakūn الإخبار عما سيكون
254
+
255
+ - |
256
+ Rule 14 Final inflections of nouns and adjectives:
257
+
258
+ (a) Vocalic endings are not represented in romanization, except preceding pronominal suffixes, and except when the text being romanized is in verse.
259
+
260
+ uṣūluhā al-nafsīyah wa-ṭuruq أصولها النفسية وطرق تدريسها
261
+ tadrīsihā
262
+ ilá yawminā hādhā الى يومنا هذا
263
+
264
+ (b) Tanwīn is not represented in romanization, except as specified in rule 12.
265
+
266
+ (c) ة (tā’ marbūṭah) is romanized h or t as specified in rule 7.
267
+
268
+ (d) For the romanization of the relative adjective (nisbah) see rule 6(c).
269
+
270
+ - |
271
+ Rule 15 Pronouns, pronominal suffixes, and demonstratives:
272
+
273
+ (a) Vocalic endings are retained in romanization.
274
+
275
+ anā wa-anta انا وانت
276
+ hādhihi al-ḥāl هذه الحال
277
+ mu’allafātuhu wa-shurūḥuhā مؤلفاته وشروحها
278
+
279
+ (b) At the close of a phrase or sentence, the ending is romanized in its pausal form.
280
+
281
+ ḥayātuhu wa-‘aṣruh حياته وعصره
282
+ Tawfīq al-Ḥakīm, afkāruh, توفيق الحكيم، أفكاره، آثاره
283
+ āthāruh
284
+
285
+ - |
286
+ Rule 16 Prepositions and conjunctions:
287
+
288
+ (a) Final vowels of separable prepositions and conjunctions are retained in romanization.
289
+
290
+ anna أن
291
+ annahu أنه
292
+ bayna yadayhi بين يديه
293
+
294
+ Note the special cases مما mimmā, ممن mimman.
295
+
296
+ (b) Inseparable prepositions, conjunctions, and other prefixes are connected with what follows by a hyphen.
297
+
298
+ bi-hi به
299
+ wa-ma‘ahu ومعه
300
+ lā-silkī لاسلكي
301
+ - |
302
+ Rule 17 The definite article:
303
+
304
+ (a) The romanized form al is connected with the following word by a hyphen.
305
+
306
+ al-kitāb al-thānī الكتاب الثاني
307
+ al-ittiḥād الإتحاد
308
+ al-aṣl الأصل
309
+ al-āthār الآثار
310
+
311
+ (b) When ال is initial in the word, and when it follows an inseparable preposition or conjunction, it is always romanized al regardless of whether the preceding word, as romanized, ends in a vowel or a consonant.
312
+
313
+ ilá al-ān الى الآن
314
+ Abū al-Wafā’ ابو الوفاء
315
+ Maktabat al-Nahḍah al-Miṣrīyah مكتبة النهضة المصرية
316
+ bi-al-tamām wa-al-kamāl بالتمام والكمال
317
+
318
+ Note the exceptional treatment of the preposition ل followed by the article
319
+
320
+ lil-Shirbīnī للشربيني
321
+
322
+ See also rule 23.
323
+
324
+ (c) The ل of the article is always romanized l, whether it is followed by a “sun letter” or not, i.e., regardless of whether or not it is assimilated in pronunciation to the initial consonant of the word to which it is attached.
325
+
326
+ al-ḥurūf al-abjadīyah الحروف الأبجدية
327
+ Abū al-Layth al-Samarqandī ابو الليث السمرقندي
328
+
329
+ - |
330
+ Rule 18 Capitalization:
331
+
332
+ (a) Rules for the capitalization of English are followed, except that the definite article al is given in lower case in all positions.
333
+
334
+ (b) Diacritics are used with both upper and lower case letters.
335
+
336
+ al-Ījī الايجي
337
+ al-Ālūsī الآلوسي
338
+
339
+ - |
340
+ Rule 19 The macron or the acute accent, as appropriate, is used to indicate all long vowels, including those which in Arabic script are written defectively. The macron or the acute accent, as the case may be, is retained over final long vowels which are shortened in pronunciation before hamzat al-waṣl.
341
+
342
+ Ibrāhīm إبراهيم ، إبرهيم
343
+ Dā’ūd داؤود ، داؤد
344
+ Abū al-Ḥasan ابو الحسن
345
+ ru’ūs رؤوس
346
+ dhālika ذلك
347
+ ‘alá al-‘ayn على العين
348
+
349
+ - |
350
+ Rule 20 The hyphen is used:
351
+
352
+ (a) To connect the definite article al with the word to which it is attached. See rule 17(a).
353
+
354
+ (b) Between an inseparable prefix and what follows. See rules 16(b) and 17(b) above.
355
+
356
+ (c) Between bin and the following element in personal names when they are written in Arabic as a single word. See rule 25.
357
+
358
+ - |
359
+ Rule 21 The prime ( ʹ ) is used:
360
+
361
+ (a) To separate two letters representing two distinct consonantal sounds, when the combination might otherwise be read as a digraph.
362
+
363
+ Adʹham أدهم
364
+ akramatʹhā أكرمتها
365
+
366
+ (b) To mark the use of a letter in its final form when it occurs in the middle of a word.
367
+
368
+ Qal‘ahʹjī قلعه‌جى
369
+ Shaykhʹzādah شيخ زاده
370
+
371
+ - |
372
+ Rule 22 As in the case of romanization from other languages, foreign words which occur in an Arabic context and are written in Arabic letters are romanized according to the rules for romanizing Arabic.
373
+ Jārmānūs (not Germanos nor Germanus) جارمانوس
374
+ Lūrd Ghrānfīl (not Lord Granville) لورد غرانفيل
375
+ Īsāghūjī (not Isagoge) ايساغوجي
376
+
377
+ For short vowels not indicated in the Arabic, the Arabic vowel nearest to the original pronunciation is supplied.
378
+
379
+ Gharsiyā Khayin (not García Jaén) غرسيا خين
380
+
381
+ # Examples of Irregular Arabic Orthography
382
+
383
+ - |
384
+ Rule 23 Note the romanization of الله, alone and in combination.
385
+
386
+ Allāh الله
387
+ billāh
388
+ lillāh
389
+ bismillāh بسم الله
390
+ al-Mustanṣir billāh
391
+
392
+ - |
393
+ Rule 24 Note the romanization of the following personal names:
394
+
395
+ Ṭāhā طه
396
+ Yāsīn يس ، يسن
397
+ ‘Amr عمرو
398
+ Bahjat بهجت ، بهجة
399
+
400
+ - |
401
+ Rule 25 ابن and بن are both romanized ibn in all positions.
402
+
403
+ Aḥmad ibn Muḥammad ibn Abī al-Rabī‘ احمد بن محمد بن ابي الربيع
404
+ Sharḥ Ibn ‘Aqīl ‘alá Alfīyat Ibn Mālik شرح ابن عقيل على الفية ابن مالك
405
+
406
+ Exception is made in the case of modern names, typically North African, in which the element بن is pronounced bin.
407
+
408
+ Bin Khiddah بن خده
409
+ Bin-‘Abd Allāh بنعبد الله
410
+
411
+ tests:
412
+ # From Rule 1 - part a
413
+ - source: وَضعْ
414
+ expected: waḍ‘
415
+ - source: عِوَضْ
416
+ expected: ‘iwaḍ
417
+ - source: دَلو
418
+ expected: dalw
419
+ - source: يَد
420
+ expected: yad
421
+ - source: حِيَل
422
+ expected: ḥiyal
423
+ - source: طَهي
424
+ expected: ṭahy
425
+
426
+ # From Rule 1 - part b
427
+ - source: أُولَى
428
+ expected: ūlá
429
+ - source: صُورَة
430
+ expected: ṣūrah
431
+ - source: ذُو
432
+ expected: dhū
433
+ - source: إيمَان
434
+ expected: īmān
435
+ - source: جِيْل
436
+ expected: jīl
437
+ - source: فِي
438
+ expected: fī
439
+ - source: كِتَاب
440
+ expected: kitāb
441
+ - source: سَحَاب
442
+ expected: saḥāb
443
+ - source: جُمَان
444
+ expected: jumān
445
+
446
+ # From Rule 1 - part c
447
+ - source: أوج
448
+ expected: awj
449
+ - source: نَوم
450
+ expected: nawm
451
+ - source: لَو
452
+ expected: law
453
+ - source: أيسَر
454
+ expected: aysar
455
+ - source: شَيخ
456
+ expected: shaykh
457
+ - source: عَينَي
458
+ expected: ‘aynay
459
+
460
+ # From Rule 4
461
+ - source: فَعَلُوا
462
+ expected: fa‘alū
463
+ # - source: أُولَائِكَ
464
+ # expected: ulā’ika
465
+ - source: أُوقِيَّة
466
+ expected: ūqīyah
467
+
468
+ # From Rule 5
469
+ - source: فَاعِل
470
+ expected: fā‘il
471
+ - source: رِضَا
472
+ expected: riḍā
473
+
474
+ # From Rule 6 - part a
475
+ - source: حَتَّى
476
+ expected: ḥattá
477
+ - source: مَضَى
478
+ expected: maḍá
479
+ - source: كُبرَى
480
+ expected: kubrá
481
+ - source: يَحيَى
482
+ expected: yaḥyá
483
+ - source: مُسَمَّى
484
+ expected: musammá
485
+ - source: مُصطَفَى
486
+ expected: muṣṭafá
487
+
488
+ # From Rule 6 - part b
489
+ - source: رَضِي الدِين
490
+ expected: raḍī al-dīn
491
+
492
+ # From Rule 6 - part c
493
+ - source: المِصرِيّ
494
+ expected: al-miṣrī
495
+
496
+ # From Rule 7 - part a
497
+ - source: صَلَاة
498
+ expected: ṣalāh
499
+ - source: الرِسَالَة البَهِيَّة
500
+ expected: al-risālah al-bahīyah
501
+ - source: مِرآة
502
+ expected: mir’āh
503
+ # - source: أرجوزة فى الطب
504
+ # expected: Urjūzah fī al-ṭibb
505
+
506
+ # From Rule 7 - part b
507
+ - source: وِزَارَة التَربِيَة
508
+ expected: wizārat al-tarbiyah
509
+ - source: مِرآة الزَمَان
510
+ expected: mir’āt al-zamān
511
+
512
+ # From Rule 8 - part a
513
+ - source: أَسَد
514
+ expected: asad
515
+ - source: أُنس
516
+ expected: uns
517
+ - source: إذَا
518
+ expected: idhā
519
+ - source: مَسأَلَة
520
+ expected: mas’alah
521
+ - source: مُؤتَمَر
522
+ expected: mu’tamar
523
+ - source: دَائِم
524
+ expected: dā’im
525
+ - source: مَلَأ
526
+ expected: mala’a
527
+ - source: خَطِئ
528
+ expected: khaṭi’a
529
+
530
+ # From Rule 9
531
+ - source: رِحلَة إبن جُبَير
532
+ expected: riḥlat ibn jubayr
533
+ - source: الإستِدرَاك
534
+ expected: al-istidrāk
535
+ # - source: كُتُب إقتَنَتهَا
536
+ # expected: kutub iqtanatʹhā # issue
537
+ # - source: باهتمام عبد ٱلمجيد
538
+ # expected: bi-ihtimām ‘Abd al-Majīd #issue
539
+
540
+ # From Rule 10 - part a
541
+ - source: آلَة
542
+ expected: ālah
543
+ - source: كُلِّيَّة الآدَاب
544
+ expected: kullīyat al-ādāb
545
+
546
+ # From Rule 10 - part b
547
+ - source: تَآلِيف
548
+ expected: ta’ālīf
549
+ - source: مَآثِر
550
+ expected: ma’āthir
551
+
552
+ # From Rule 10 - part c
553
+ - source: خُلَفَآء
554
+ expected: khulafā’
555
+
556
+ # From Rule 11 - part a-1
557
+ - source: عَدُوّ
558
+ expected: ‘adūw
559
+ - source: قُوَّة
560
+ expected: qūwah
561
+
562
+ # From Rule 11 - part a-2
563
+ - source: شَوَّال
564
+ expected: shawwāl
565
+ - source: صَوَّرَ
566
+ expected: ṣawwara
567
+ - source: جَوّ
568
+ expected: jaww
569
+
570
+ # From Rule 11 - part b-1
571
+ - source: المِصرِيَّة
572
+ expected: al-miṣrīyah
573
+
574
+ # From Rule 11 - part b-3
575
+ - source: أَيَّام
576
+ expected: ayyām
577
+ - source: سَيِّد
578
+ expected: sayyid
579
+ - source: قُصَيّ
580
+ expected: quṣayy
581
+
582
+ # From Rule 11 - part c
583
+ - source: الغَزِّيّ
584
+ expected: al-ghazzī
585
+ - source: الكَشَّاف
586
+ expected: al-kashshāf
587
+
588
+ # From Rule 12 - part a
589
+ - source: قَاضٍ
590
+ expected: qāḍin
591
+ - source: مَعنًى
592
+ expected: ma‘nan
593
+
594
+ # From Rule 12 - part b
595
+ - source: طَبعًا
596
+ expected: ṭab‘an
597
+ - source: فَجأَةً
598
+ expected: faj’atan
599
+ - source: المُشتَرِك وَضعاً
600
+ expected: al-mushtarik waḍ‘an
601
+ # - source: وَالمُفتَرِق صُقعاً #issue
602
+ # expected: wa-al-muftariq ṣuq‘an
603
+
604
+ # Grammar
605
+ # From Rule 13
606
+ - source: مَن وَلِيَ مِصر
607
+ expected: man waliya miṣr
608
+ # - source: مَعرِفَة مَا يَجِبُ لَهُم
609
+ # expected: ma‘rifat mā yajibu la-hum
610
+ # - source: صَلَّى اللَّه عَلَيهِ وسَلَّم # issue allah starting with al
611
+ # expected: ṣallá Allāh ‘alayhi wa-sallam
612
+ - source: اللُؤلُؤ المَكنُون فِي حُكم
613
+ expected: al-lu’lu’ al-maknūn fī ḥukm
614
+ # - source: الإخبَار عَمَّا سَيَكُون #issue sa-yakūn
615
+ # expected: al-ikhbār ‘ammā sa-yakūn
616
+
617
+ # From Rule 14 - part a
618
+ # - source: أُصُولَهَا النَفسِيَّة وَطُرُق تَدرِيسِهَا # issue wa-ṭuruq
619
+ # expected: uṣūluhā al-nafsīyah wa-ṭuruq tadrīsihā
620
+ - source: إلَى يَومِنَا هَذَا
621
+ expected: ilá yawminā hādhā
622
+
623
+ # From Rule 15 - part a
624
+ # - source: انا وانت # issue waw atf
625
+ # expected: anā wa-anta
626
+ - source: هَذِهِ الحَال
627
+ expected: hādhihi al-ḥāl
628
+ # - source: مُؤَلَّفَاتُهُ وَشُرُوحُهَا
629
+ # expected: mu’allafātuhu wa-shurūḥuhā
630
+
631
+ # From Rule 15 - part b
632
+ # - source: حياته وعصره
633
+ # expected: ḥayātuhu wa-‘aṣruh
634
+ - source: تَوفِيق الحَكِيم، أَفكَارُه، آثَارُه
635
+ expected: tawfīq al-ḥakīm, afkāruh, āthāruh
636
+
637
+ # From Rule 16 - part a
638
+ - source: أَنَّ
639
+ expected: anna
640
+ - source: أَنَّهُ
641
+ expected: annahu
642
+ - source: بَينَ يَدَيهِ
643
+ expected: bayna yadayhi
644
+
645
+ # From Rule 16 - part b
646
+ # - source: به
647
+ # expected: bi-hi
648
+ # - source: ومعه
649
+ # expected: wa-ma‘ahu
650
+ # - source: لاسلكي
651
+ # expected: lā-silkī
652
+
653
+ # From Rule 17 - part a
654
+ - source: الكِتَاب الثَانِي
655
+ expected: al-kitāb al-thānī
656
+ - source: الإتِّحَاد
657
+ expected: al-ittiḥād
658
+ - source: الأَصل
659
+ expected: al-aṣl
660
+ - source: الآثَار
661
+ expected: al-āthār
662
+
663
+ # From Rule 17 - part b
664
+ - source: إلَى الآن
665
+ expected: ilá al-ān
666
+ - source: ابُو الوَفَاء
667
+ expected: abū al-wafā’
668
+ - source: مَكتَبَة النَهضَة المِصرِيَّة
669
+ expected: maktabat al-nahḍah al-miṣrīyah
670
+ # - source: بالتمام والكمال
671
+ # expected: bi-al-tamām wa-al-kamāl
672
+ # - source: للشربيني
673
+ # expected: lil-Shirbīnī
674
+
675
+ # From Rule 17 - part c
676
+ - source: الحُرُوف الأَبجَدِيَّة
677
+ expected: al-ḥurūf al-abjadīyah
678
+ - source: ابُو اللَيث السَمَرقَندِي
679
+ expected: abū al-layth al-samarqandī
680
+
681
+ # From Rule 18 - part b
682
+ - source: الإيجِي
683
+ expected: al-ījī
684
+ - source: الآلُوسِي
685
+ expected: al-ālūsī
686
+
687
+ # From Rule 19
688
+ # - source: إبراهيم ، إبرهيم
689
+ # expected: Ibrāhīm
690
+ # - source: داؤود ، داؤد
691
+ # expected: Dā’ūd
692
+ # - source: ابو الحسن
693
+ # expected: Abū al-Ḥasan
694
+ - source: رُؤُوس
695
+ expected: ru’ūs
696
+ # - source: ذَلِكَ
697
+ # expected: dhālika
698
+ - source: عَلَى العَين
699
+ expected: ‘alá al-‘ayn
700
+
701
+ # # From Rule 21 - part a
702
+ # - source: أدهم
703
+ # expected: Adʹham
704
+ # - source: أكرمتها
705
+ # expected: akramatʹhā
706
+
707
+ # # From Rule 21 - part b
708
+ # - source: قلعه‌جى
709
+ # expected: Qal‘ahʹjī
710
+ # - source: شيخ زاده
711
+ # expected: Shaykhʹzādah
712
+
713
+ # From Rule 22
714
+ - source: جَارمَانُوس
715
+ expected: jārmānūs # not Germanos nor Germanus
716
+ - source: لُورد غرَانفِيل
717
+ expected: lūrd ghrānfīl # not Lord Granville
718
+ - source: إيسَاغُوجِي
719
+ expected: īsāghūjī # not Isagoge
720
+ # - source: غرسيا خين
721
+ # expected: Gharsiyā Khayin # not García Jaén
722
+
723
+ # From Rule 23
724
+ - source: اللَّه
725
+ expected: Allāh
726
+ # - source: بسم الله
727
+ # expected: bismillāh
728
+
729
+ # # From Rule 24
730
+ # - source: طه
731
+ # expected: Ṭāhā
732
+ # - source: يس ، يسن
733
+ # expected: Yāsīn
734
+ # - source: عمرو
735
+ # expected: ‘Amr
736
+ # - source: بهجت ، بهجة
737
+ # expected: Bahjat
738
+
739
+ # # From Rule 25
740
+ # - source: احمد بن محمد بن ابي الربيع
741
+ # expected: Aḥmad ibn Muḥammad ibn Abī al-Rabī‘
742
+ # - source: شرح ابن عقيل على الفية ابن مالك
743
+ # expected: Sharḥ Ibn ‘Aqīl ‘alá Alfīyat Ibn Mālik
744
+ # - source: بن خده
745
+ # expected: Bin Khiddah
746
+ # - source: بنعبد الله
747
+ # expected: Bin-‘Abd Allāh
748
+
749
+
750
+
751
+ map:
752
+ characters:
753
+
754
+
755
+ # pointing
756
+ '\u064e' : 'a' # َ fatha
757
+ # '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
758
+ # '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
759
+ '\u0650' : 'i' # ِ kasra
760
+ '\u064f' : 'u' # ُ damma
761
+ '\u0652' : '' # ْ sokoon, see note A below
762
+
763
+
764
+ # special pointed letters
765
+ '\u0639\u064e' : '‘a' # عَ
766
+ '\u0639\u0650' : '‘i' # عِ
767
+ '\u0639\u064f' : '‘ū' # عُ
768
+ # handle MacOS regex difference
769
+ '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
770
+
771
+ '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
772
+ '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
773
+ '\u064f\u0648' : 'ū' # ـُو damma followed by و
774
+ '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
775
+ '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي
776
+ '\u064e\u0648\u0652' : 'aw' # ـَوْ
777
+ '\u064e\u064a\u0652' : 'ay' # ـَيْ
778
+
779
+
780
+ # ta' marboota
781
+ '\u0629' : 't' # ة in the middle of the sentence
782
+ '\u0629$' : 'h'
783
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'h'
784
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'h'
785
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'h'
786
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'h'
787
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'h'
788
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'h'
789
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'h'
790
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'h'
791
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'h'
792
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'h'
793
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'h'
794
+ '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'h'
795
+
796
+ # Rule 4
797
+ '\u064f\u0648\u0627' : 'ū' # وا
798
+
799
+ # Rule 8
800
+ '\u0621' : '’'
801
+ '\b\u0623' : '' # أ
802
+ '\u0623' : '’' # أ
803
+ '\u0623\b' : '’a' # أ
804
+ '\u0623\u0648' : 'aw' # أو
805
+ '\u0623\u064a' : 'ay' # أي
806
+ '\u0624': '’' # ؤ
807
+ '\u0625\u064a' : 'ī' # إِ
808
+ '\u0625' : 'i' # إِ
809
+ '\u0626' : "’" # ئ
810
+ '\u0626\b' : "’a" # ئ
811
+ '\b\u0627' : 'a' # ا
812
+ '\u0627' : '' # ا
813
+
814
+ # Rule 10
815
+
816
+ '\b\u0622' : 'ā' # آ
817
+ '(?<!\b\u0627\u0644)(?<!\b)\u0622(?![\b|\u0621])' : '’ā' # آ in middle, not final, or initial, or after ال
818
+ '\u064e\u0622' : 'ā' # ـَآ fatha followed by ا
819
+ '\u0622' : '' # آ
820
+
821
+ # Rule 11 - shadda
822
+ '\u064f\u0648\u0651' : 'ūw' # ـَوّ damma followed by و with shadda
823
+ '\u064e\u0648\u0651' : 'aww' # ـَوّ fatha followed by و with shadda
824
+ '\u064e\u064a\u0651' : 'ayy' # ـَيّ fatha followed by و with shadda
825
+ '\u0650\u064a\u0651' : 'īy' # ـِيَّ
826
+ '\u0650\u064a\u0651\b' : 'ī' # ـِيَّ
827
+
828
+ '\u0628\u0651' : 'bb' # ب
829
+ '\u062a\u0651' : 'tt' # ت
830
+ '\u062b\u0651' : 'thth' # ث
831
+ '\u062c\u0651' : 'jj' # ج
832
+ '\u062d\u0651' : 'ḥḥ' # ح
833
+ '\u062e\u0651' : 'khkh' # خ
834
+ '\u062f\u0651' : 'dd' # د
835
+ '\u0630\u0651' : 'dhdh' # ذ
836
+ '\u0631\u0651' : 'rr' # ر
837
+ '\u0632\u0651' : 'zz' # ز
838
+ '\u0633\u0651' : 'ss' # س
839
+ '\u0634\u0651' : 'shsh' # ش
840
+ '\u0635\u0651' : 'ṣṣ' # ص
841
+ '\u0636\u0651' : 'ḍḍ' # ض
842
+ '\u0637\u0651' : 'ṭṭ' # ط
843
+ '\u0638\u0651' : 'ẓẓ' # ظ
844
+ '\u063a\u0651' : 'ghgh' # غ
845
+ '\u0641\u0651' : 'ff' # ف
846
+ '\u0642\u0651' : 'qq' # ق
847
+ '\u0643\u0651' : 'kk' # ك
848
+ '\u0644\u0651' : 'll' # ل
849
+ '\u0645\u0651' : 'mm' # م
850
+ '\u0646\u0651' : 'nn' # ن
851
+ '\u0647\u0651' : 'hh' # ه
852
+ '\u0648\u0651' : 'ww' # و
853
+ '\u064a\u0651' : 'yy' # ي
854
+
855
+ # Rule 12 - tanwin
856
+
857
+ '\u064c': 'un' # ٌ
858
+ '\u064b': 'an' # ً
859
+ '\u064d': 'in' # ٍ
860
+ # tanween should be onb the letter preceeding the end in case of ا, ى
861
+ # however, it's common that people mistake that, so we're handling both orders
862
+ '\u064b\u0649': 'an' # ً
863
+ '\u064b\u0627': 'an' # ً
864
+ '\u0649\u064b': 'an' # ً
865
+ '\u0627\u064b': 'an' # ً
866
+
867
+ # Rule 13
868
+ '\u0647\u064e' : 'hā' # ه
869
+
870
+ '\u060c': ',' # ،
871
+
872
+ '\u0627\u0644\u0644\u0651\u064e\u0647': "Allāh"
873
+
874
+ '\b\u0627\u0644' : 'al-' # ال
875
+ # '\uFE8E' : '' # ﺎ
876
+
877
+ '\u0628' : 'b' # ب
878
+ '\uFE91' : 'b' # ﺑ
879
+ '\uFE92' : 'b' # ﺒ
880
+ '\uFE90' : 'b' # ﺐ
881
+
882
+ '\u062a' : 't' # ت
883
+ '\ufe97' : 't' # ﺗ
884
+ '\ufe98' : 't' # ﺘ
885
+ '\ufe96' : 't' # ﺖ
886
+
887
+ '\u062b' : 'th' # ث
888
+ '\ufe9b' : 'th' # ﺛ
889
+ '\ufe9c' : 'th' # ﺜ
890
+ '\ufe9a' : 'th' # ﺚ
891
+
892
+ '\u062c' : 'j' # ج
893
+ '\ufe9f' : 'j' # ﺟ
894
+ '\ufea0' : 'j' # ﺠ
895
+ '\ufe9e' : 'j' # ﺞ
896
+
897
+ '\u062d' : 'ḥ' # ح
898
+ '\ufea3' : 'ḥ' # ﺣ
899
+ '\ufea4' : 'ḥ' # ﺤ
900
+ '\ufea2' : 'ḥ' # ﺢ
901
+
902
+ '\u062e' : 'kh' # خ
903
+ '\ufea7' : 'kh' # ﺧ
904
+ '\ufea8' : 'kh' # ﺨ
905
+ '\ufea6' : 'kh' # ﺦ
906
+
907
+ '\u062f' : 'd' # د
908
+ '\ufeaa' : 'd' # ﺪ
909
+ # Initial
910
+ '\ufea3': 'ḥ' # ﺣ
911
+ '\ufebb': 'ṣ' # ﺻ
912
+ '\ufebf': 'ḍ' # ﺿ
913
+ '\ufec3': 'ṭ' # ﻃ
914
+ '\ufec7': 'ẓ' # ﻇ
915
+
916
+ '\u0630' : 'dh' # ذ
917
+ '\ufeac' : 'dh' # ﺬ
918
+
919
+ '\u0631' : 'r' # ر
920
+ '\ufeae' : 'r' # ﺮ
921
+
922
+ '\u0632' : 'z' # ز
923
+ '\ufeb0' : 'z' # ﺰ
924
+
925
+ '\u0633' : 's' # س
926
+ '\ufeb3' : 's' # ﺳ
927
+ '\ufeb4' : 's' # ﺴ
928
+ '\ufeb2' : 's' # ﺲ
929
+
930
+ '\u0634' : 'sh' # ش
931
+ '\ufeb7' : 'sh' # ﺷ
932
+ '\ufeb8' : 'sh' # ﺸ
933
+ '\ufeb6' : 'sh' # ﺶ
934
+
935
+ '\u0635' : 'ṣ' # ص
936
+ '\ufebb' : 'ṣ' # ﺻ
937
+ '\ufebc' : 'ṣ' # ﺼ
938
+ '\ufeba' : 'ṣ' # ﺺ
939
+
940
+ '\u0636' : 'ḍ' # ض
941
+ '\ufebf' : 'ḍ' # ﺿ
942
+ '\ufec0' : 'ḍ' # ﻀ
943
+ '\ufebe' : 'ḍ' # ﺾ
944
+
945
+ '\u0637' : 'ṭ' # ط
946
+ '\ufec3' : 'ṭ' # ﻃ
947
+ '\ufec4' : 'ṭ' # ﻄ
948
+ '\ufec2' : 'ṭ' # ﻂ
949
+
950
+ '\u0638' : 'ẓ' # ظ
951
+ '\ufec7' : 'ẓ' # ﻇ
952
+ '\ufec8' : 'ẓ' # ﻈ
953
+ '\ufec6' : 'ẓ' # ﻆ
954
+
955
+ '\u0639' : '‘' # ع
956
+ '\ufecb' : '‘' # ﻋ
957
+ '\ufecc' : '‘' # ﻌ
958
+ '\ufeca' : '‘' # ﻊ
959
+
960
+ '\u063a' : 'gh' # غ
961
+ '\ufecf' : 'gh' # ﻏ
962
+ '\ufed0' : 'gh' # ﻐ
963
+ '\ufece' : 'gh' # ﻎ
964
+
965
+ '\u0641' : 'f' # ف
966
+ '\ufed3' : 'f' # ﻓ
967
+ '\ufed4' : 'f' # ﻔ
968
+ '\ufed2' : 'f' # ﻒ
969
+
970
+ '\u0642' : 'q' # ق
971
+ '\ufed7' : 'q' # ﻗ
972
+ '\ufed8' : 'q' # ﻘ
973
+ '\ufed6' : 'q' # ﻖ
974
+
975
+ '\u0643' : 'k' # ك
976
+ '\ufedb' : 'k' # ﻛ
977
+ '\ufedc' : 'k' # ﻜ
978
+ '\ufeda' : 'k' # ﻚ
979
+
980
+ '\u0644' : 'l' # ل
981
+ '\ufedf' : 'l' # ﻟ
982
+ '\ufee0' : 'l' # ﻠ
983
+ '\ufede' : 'l' # ﻞ
984
+
985
+ '\u0645' : 'm' # م
986
+ '\ufee3' : 'm' # ﻣ
987
+ '\ufee4' : 'm' # ﻤ
988
+ '\ufee2' : 'm' # ﻢ
989
+
990
+ '\u0646' : 'n' # ن
991
+ '\ufee7' : 'n' # ﻧ
992
+ '\ufee8' : 'n' # ﻨ
993
+ '\ufee6' : 'n' # ﻦ
994
+
995
+ # See note C
996
+ '\u0647' : 'h' # ه
997
+ '\ufeeb' : 'h' # ﻫ
998
+ '\ufeec' : 'h' # ﻬ
999
+ '\ufeea' : 'h' # ﻪ
1000
+
1001
+ '\u0648' : 'w' # و
1002
+ '\ufeee' : 'w' # ﻮ
1003
+
1004
+ '\u064a' : 'y' # ي
1005
+ '\ufef3' : 'y' # ﻳ
1006
+ '\ufef4' : 'y' # ﻴ
1007
+ '\ufef1' : 'y' # ﻱ
1008
+
1009
+ # (A) Not romanized word-initially.
1010
+
1011
+ # (B) Not romanized, but see romanizations accompanying alif (ا) in the table for vowels.
1012
+
1013
+ # (C) In certain endings, an original tā’ (ت) is written ة, i.e., like hā’ (ه) with two dots, and is known as tā’ marbūṯah. It is romanized h, except in the construct form of feminine nouns, where it is romanized t, instead.
1014
+
1015
+
1016
+ # Vowels, diphthongs and diacritical marks
1017
+ # (ـ stands for any consonant)
1018
+
1019
+
1020
+ # Vowels and Diphthongs
1021
+ '\u064e': 'a'
1022
+ '\u064f': 'u'
1023
+ '\u0650': 'i'
1024
+ '\u064e\u0627': 'ā' # see Rule 5
1025
+ '\ufeef \u064e': 'á' # see Rule 6(a)
1026
+ '\ufeed \u064f': 'ū'
1027
+ '\ufeef \u0650': 'ī'
1028
+ '\ufeed\u0652 \u064e': 'aw'
1029
+ '\ufeef\u0652 \u064e': 'ay'
1030
+
1031
+ # Letters Representing Non-Arabic Consonants
1032
+ # (this list in not exhaustive)
1033
+ '\u06af': 'g' # گ
1034
+ '\u06b4': 'ñ' # ڴ
1035
+ '\u067e': 'p' # پ
1036
+ '\u0686':
1037
+ - 'ch' # چ
1038
+ - 'zh'
1039
+ '\u0698': 'zh' # ژ
1040
+ '\u06a4': 'v' # ڤ
1041
+ '\u06cb': 'v' # ۋ
1042
+ '\u06a5': 'v' # ڥ
1043
+
1044
+ # Arabic standard Unicode block
1045
+ '\u0600': '' # ؀
1046
+ '\u0601': '' # ؁
1047
+ '\u0602': '' # ؂
1048
+ '\u0603': '' # ؃
1049
+ '\u0604': '' # ؄
1050
+ '\u0605': '' # ؅
1051
+ '\u0606': '' # ؆
1052
+ '\u0607': '' # ؇
1053
+ '\u0608': '' # ؈
1054
+ '\u0609': '' # ؉
1055
+ '\u060a': '' # ؊
1056
+ '\u060b': '' # ؋
1057
+ '\u060d': '' # ؍
1058
+ '\u060e': '' # ؎
1059
+ '\u060f': '' # ؏
1060
+ '\u0610': '' # ؐ
1061
+ '\u0611': '' # ؑ
1062
+ '\u0612': '' # ؒ
1063
+ '\u0613': '' # ؓ
1064
+ '\u0614': '' # ؔ
1065
+ '\u0615': '' # ؕ
1066
+ '\u0616': '' # ؖ
1067
+ '\u0617': '' # ؗ
1068
+ '\u0618': '' # ؘ
1069
+ '\u0619': '' # ؙ
1070
+ '\u061a': '' # ؚ
1071
+ '\u061b': '' # ؛
1072
+ '\u061c': '' #
1073
+ '\u061d': '' #
1074
+ '\u061e': '' # ؞
1075
+ '\u061f': '' # ؟
1076
+ '\u0620': '' # ؠ
1077
+ '\u0628': 'b' # ب
1078
+ # '\u0629': 'h' # ة -- see Note 3
1079
+ '\u062a': 't' # ت
1080
+ '\u062b': 'th' # ث
1081
+ '\u062c': 'j' # ج
1082
+ '\u062d': 'ḥ' # ح
1083
+ '\u062e': 'kh' # خ
1084
+ '\u062f': 'd' # د
1085
+ '\u0630': 'dh' # ذ
1086
+ '\u0631': 'r' # ر
1087
+ '\u0632': 'z' # ز
1088
+ '\u0633': 's' # س
1089
+ '\u0634': 'sh' # ش
1090
+ '\u0635': 'ṣ' # ص
1091
+ '\u0636': 'ḍ' # ض
1092
+ '\u0637': 'ṭ' # ط
1093
+ '\u0638': 'ẓ' # ظ
1094
+ '\u0639': '‘' # ع
1095
+ '\u063a': 'gh' # غ
1096
+ '\u063b': '' # ػ
1097
+ '\u063c': '' # ؼ
1098
+ '\u063d': '' # ؽ
1099
+ '\u063e': '' # ؾ
1100
+ '\u063f': '' # ؿ
1101
+ '\u0640': '' # ـ
1102
+ '\u0641': 'f' # ف -- see Note 2
1103
+ '\u0642': 'q' # ق -- see Note 2
1104
+ '\u0643': 'k' # ك
1105
+ '\u0644': 'l' # ل
1106
+ '\u0645': 'm' # م
1107
+ '\u0646': 'n' # ن
1108
+ # '\u0647': 'h' # ه -- see Note 3
1109
+ '\u0648': 'w' # و
1110
+ # '\u064a': 'y' # ي
1111
+ # '\u064e': '' # َ
1112
+ # '\u064f': '' # ُ
1113
+ # '\u0650': '' # ِ
1114
+ # '\u0652': '' # ْ
1115
+ # '\u0653': '' # ٓ
1116
+ # '\u0654': '' # ٔ
1117
+ # '\u0655': '' # ٕ
1118
+ # '\u0656': '' # ٖ
1119
+ # '\u0657': '' # ٗ
1120
+ # '\u0658': '' # ٘
1121
+ # '\u0659': '' # ٙ
1122
+ # '\u065a': '' # ٚ
1123
+ # '\u065b': '' # ٛ
1124
+ # '\u065c': '' # ٜ
1125
+ # '\u065d': '' # ٝ
1126
+ # '\u065e': '' # ٞ
1127
+ # '\u065f': '' # ٟ
1128
+ # '\u0660': '' # ٠
1129
+ '\u0661': '' # ١
1130
+ '\u0662': '' # ٢
1131
+ '\u0663': '' # ٣
1132
+ '\u0664': '' # ٤
1133
+ '\u0665': '' # ٥
1134
+ '\u0666': '' # ٦
1135
+ '\u0667': '' # ٧
1136
+ '\u0668': '' # ٨
1137
+ '\u0669': '' # ٩
1138
+ '\u066a': '' # ٪
1139
+ '\u066b': '' # ٫
1140
+ '\u066c': '' # ٬
1141
+ '\u066d': '' # ٭
1142
+ '\u066e': '' # ٮ
1143
+ '\u066f': '' # ٯ
1144
+ '\u0670': '' # ٰ
1145
+ '\u0671': '' # ٱ
1146
+ '\u0672': '' # ٲ
1147
+ '\u0673': '' # ٳ
1148
+ '\u0674': '' # ٴ
1149
+ '\u0675': '' # ٵ
1150
+ '\u0676': '' # ٶ
1151
+ '\u0677': '' # ٷ
1152
+ '\u0678': '' # ٸ
1153
+ '\u0679': '' # ٹ
1154
+ '\u067a': '' # ٺ
1155
+ '\u067b': '' # ٻ
1156
+ '\u067c': '' # ټ
1157
+ '\u067d': '' # ٽ
1158
+ # '\u067e': 'p' # پ
1159
+ '\u067f': '' # ٿ
1160
+ '\u0680': '' # ڀ
1161
+ '\u0681': '' # ځ
1162
+ '\u0682': '' # ڂ
1163
+ '\u0683': '' # ڃ
1164
+ '\u0684': '' # ڄ
1165
+ '\u0685': '' # څ
1166
+ # '\u0686': 'ch' # چ
1167
+ '\u0687': '' # ڇ
1168
+ '\u0688': '' # ڈ
1169
+ '\u0689': '' # ډ
1170
+ '\u068a': '' # ڊ
1171
+ '\u068b': '' # ڋ
1172
+ '\u068c': '' # ڌ
1173
+ '\u068d': '' # ڍ
1174
+ '\u068e': '' # ڎ
1175
+ '\u068f': '' # ڏ
1176
+ '\u0690': '' # ڐ
1177
+ '\u0691': '' # ڑ
1178
+ '\u0692': '' # ڒ
1179
+ '\u0693': '' # ړ
1180
+ '\u0694': '' # ڔ
1181
+ '\u0695': '' # ڕ
1182
+ '\u0696': '' # ږ
1183
+ '\u0697': '' # ڗ
1184
+ # '\u0698': 'zh' # ژ
1185
+ '\u0699': '' # ڙ
1186
+ '\u069a': '' # ښ
1187
+ '\u069b': '' # ڛ
1188
+ '\u069c': '' # ڜ
1189
+ '\u069d': '' # ڝ
1190
+ '\u069e': '' # ڞ
1191
+ '\u069f': '' # ڟ
1192
+ '\u06a0': '' # ڠ
1193
+ '\u06a1': '' # ڡ
1194
+ '\u06a2': '' # ڢ
1195
+ '\u06a3': '' # ڣ
1196
+ # '\u06a4': 'v' # ڤ
1197
+ # '\u06a5': 'v' # ڥ
1198
+ '\u06a6': '' # ڦ
1199
+ '\u06a7': '' # ڧ
1200
+ '\u06a8': '' # ڨ
1201
+ '\u06a9': '' # ک
1202
+ '\u06aa': '' # ڪ
1203
+ '\u06ab': '' # ګ
1204
+ '\u06ac': '' # ڬ
1205
+ '\u06ad': '' # ڭ
1206
+ '\u06ae': '' # ڮ
1207
+ # '\u06af': 'g' # گ
1208
+ '\u06b0': '' # ڰ
1209
+ '\u06b1': '' # ڱ
1210
+ '\u06b2': '' # ڲ
1211
+ '\u06b3': '' # ڳ
1212
+ # '\u06b4': 'ñ' # ڴ
1213
+ '\u06b5': '' # ڵ
1214
+ '\u06b6': '' # ڶ
1215
+ '\u06b7': '' # ڷ
1216
+ '\u06b8': '' # ڸ
1217
+ '\u06b9': '' # ڹ
1218
+ '\u06ba': '' # ں
1219
+ '\u06bb': '' # ڻ
1220
+ '\u06bc': '' # ڼ
1221
+ '\u06bd': '' # ڽ
1222
+ '\u06be': '' # ھ
1223
+ '\u06bf': '' # ڿ
1224
+ '\u06c0': '' # ۀ
1225
+ '\u06c1': '' # ہ
1226
+ '\u06c2': '' # ۂ
1227
+ '\u06c3': '' # ۃ
1228
+ '\u06c4': '' # ۄ
1229
+ '\u06c5': '' # ۅ
1230
+ '\u06c6': '' # ۆ
1231
+ '\u06c7': '' # ۇ
1232
+ '\u06c8': '' # ۈ
1233
+ '\u06c9': '' # ۉ
1234
+ '\u06ca': '' # ۊ
1235
+ # '\u06cb': 'v' # ۋ
1236
+ '\u06cc': '' # ی
1237
+ '\u06cd': '' # ۍ
1238
+ '\u06ce': '' # ێ
1239
+ '\u06cf': '' # ۏ
1240
+ '\u06d0': '' # ې
1241
+ '\u06d1': '' # ۑ
1242
+ '\u06d2': '' # ے
1243
+ '\u06d3': '' # ۓ
1244
+ '\u06d4': '' # ۔
1245
+ '\u06d5': '' # ە
1246
+ '\u06d6': '' # ۖ
1247
+ '\u06d7': '' # ۗ
1248
+ '\u06d8': '' # ۘ
1249
+ '\u06d9': '' # ۙ
1250
+ '\u06da': '' # ۚ
1251
+ '\u06db': '' # ۛ
1252
+ '\u06dc': '' # ۜ
1253
+ '\u06dd': '' # ۝
1254
+ '\u06de': '' # ۞
1255
+ '\u06df': '' # ۟
1256
+ '\u06e0': '' # ۠
1257
+ '\u06e1': '' # ۡ
1258
+ '\u06e2': '' # ۢ
1259
+ '\u06e3': '' # ۣ
1260
+ '\u06e4': '' # ۤ
1261
+ '\u06e5': '' # ۥ
1262
+ '\u06e6': '' # ۦ
1263
+ '\u06e7': '' # ۧ
1264
+ '\u06e8': '' # ۨ
1265
+ '\u06e9': '' # ۩
1266
+ '\u06ea': '' # ۪
1267
+ '\u06eb': '' # ۫
1268
+ '\u06ec': '' # ۬
1269
+ '\u06ed': '' # ۭ
1270
+ '\u06ee': '' # ۮ
1271
+ '\u06ef': '' # ۯ
1272
+ '\u06f0': '' # ۰
1273
+ '\u06f1': '' # ۱
1274
+ '\u06f2': '' # ۲
1275
+ '\u06f3': '' # ۳
1276
+ '\u06f4': '' # ۴
1277
+ '\u06f5': '' # ۵
1278
+ '\u06f6': '' # ۶
1279
+ '\u06f7': '' # ۷
1280
+ '\u06f8': '' # ۸
1281
+ '\u06f9': '' # ۹
1282
+ '\u06fa': '' # ۺ
1283
+ '\u06fb': '' # ۻ
1284
+ '\u06fc': '' # ۼ
1285
+ '\u06fd': '' # ۽
1286
+ '\u06fe': '' # ۾
1287
+ '\u06ff': '' # ۿ