interscript 0.1.9 → 2.0.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (352) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +11 -0
  3. data/.rspec +3 -0
  4. data/Gemfile +29 -0
  5. data/LICENSE.adoc +31 -0
  6. data/README.md +3 -0
  7. data/Rakefile +53 -0
  8. data/bin/console +14 -0
  9. data/bin/interscript +5 -0
  10. data/bin/maps_analyze_staging +168 -0
  11. data/bin/maps_debug_compilers +58 -0
  12. data/bin/maps_debug_ordering +88 -0
  13. data/bin/maps_debug_ruby_compile +24 -0
  14. data/bin/maps_debug_step_by_step +44 -0
  15. data/bin/maps_optimize_order +112 -0
  16. data/bin/maps_v1_analyze_regexps +45 -0
  17. data/bin/maps_v1_to_v2 +426 -0
  18. data/bin/setup +8 -0
  19. data/exe/interscript +6 -0
  20. data/interscript.gemspec +31 -0
  21. data/lib/interscript.rb +80 -135
  22. data/lib/interscript/command.rb +5 -5
  23. data/lib/interscript/compiler.rb +22 -0
  24. data/lib/interscript/compiler/javascript.rb +292 -0
  25. data/lib/interscript/compiler/ruby.rb +262 -0
  26. data/lib/interscript/dsl.rb +67 -0
  27. data/lib/interscript/dsl/aliases.rb +23 -0
  28. data/lib/interscript/dsl/document.rb +46 -0
  29. data/lib/interscript/dsl/group.rb +45 -0
  30. data/lib/interscript/dsl/group/parallel.rb +6 -0
  31. data/lib/interscript/dsl/items.rb +89 -0
  32. data/lib/interscript/dsl/metadata.rb +26 -0
  33. data/lib/interscript/dsl/stage.rb +6 -0
  34. data/lib/interscript/dsl/symbol_mm.rb +11 -0
  35. data/lib/interscript/dsl/tests.rb +12 -0
  36. data/lib/interscript/interpreter.rb +251 -0
  37. data/lib/interscript/node.rb +25 -0
  38. data/lib/interscript/node/alias_def.rb +15 -0
  39. data/lib/interscript/node/dependency.rb +13 -0
  40. data/lib/interscript/node/document.rb +45 -0
  41. data/lib/interscript/node/group.rb +34 -0
  42. data/lib/interscript/node/group/parallel.rb +9 -0
  43. data/lib/interscript/node/group/sequential.rb +2 -0
  44. data/lib/interscript/node/item.rb +52 -0
  45. data/lib/interscript/node/item/alias.rb +42 -0
  46. data/lib/interscript/node/item/any.rb +61 -0
  47. data/lib/interscript/node/item/capture.rb +50 -0
  48. data/lib/interscript/node/item/group.rb +51 -0
  49. data/lib/interscript/node/item/repeat.rb +40 -0
  50. data/lib/interscript/node/item/stage.rb +23 -0
  51. data/lib/interscript/node/item/string.rb +51 -0
  52. data/lib/interscript/node/metadata.rb +18 -0
  53. data/lib/interscript/node/rule.rb +6 -0
  54. data/lib/interscript/node/rule/funcall.rb +18 -0
  55. data/lib/interscript/node/rule/run.rb +15 -0
  56. data/lib/interscript/node/rule/sub.rb +65 -0
  57. data/lib/interscript/node/stage.rb +19 -0
  58. data/lib/interscript/node/tests.rb +15 -0
  59. data/lib/interscript/stdlib.rb +211 -0
  60. data/lib/interscript/utils/regexp_converter.rb +283 -0
  61. data/lib/interscript/version.rb +1 -1
  62. data/requirements.txt +1 -0
  63. metadata +73 -458
  64. data/README.adoc +0 -296
  65. data/aliases.json +0 -1
  66. data/lib/g2pwrapper.py +0 -34
  67. data/lib/interscript/fs.rb +0 -96
  68. data/lib/interscript/mapping.rb +0 -144
  69. data/lib/interscript/opal.rb +0 -196
  70. data/lib/interscript/opal/entrypoint.rb +0 -20
  71. data/lib/interscript/opal/exports.rb +0 -11
  72. data/lib/interscript/opal/maps.js.erb +0 -8
  73. data/lib/model-7 +0 -0
  74. data/lib/tha-pt-b-7 +0 -0
  75. data/maps/acadsin-zho-Hani-Latn-2002.yaml +0 -38916
  76. data/maps/alalc-amh-Ethi-Latn-1997.yaml +0 -513
  77. data/maps/alalc-amh-Ethi-Latn-2011.yaml +0 -138
  78. data/maps/alalc-ara-Arab-Latn-1997.yaml +0 -1287
  79. data/maps/alalc-asm-Deva-Latn-1997.yaml +0 -259
  80. data/maps/alalc-asm-Deva-Latn-2012.yaml +0 -55
  81. data/maps/alalc-aze-Arab-Latn-1997.yaml +0 -376
  82. data/maps/alalc-aze-Cyrl-Latn-1997.yaml +0 -145
  83. data/maps/alalc-bel-Cyrl-Latn-1997.yaml +0 -129
  84. data/maps/alalc-ben-Beng-Latn-1997.yaml +0 -291
  85. data/maps/alalc-ben-Beng-Latn-2017.yaml +0 -130
  86. data/maps/alalc-bul-Cyrl-Latn-1997.yaml +0 -98
  87. data/maps/alalc-div-Thaa-Latn-1997.yaml +0 -211
  88. data/maps/alalc-ell-Grek-Latn-1997.yaml +0 -628
  89. data/maps/alalc-ell-Grek-Latn-2010.yaml +0 -626
  90. data/maps/alalc-guj-Gujr-Latn-1997.yaml +0 -266
  91. data/maps/alalc-guj-Gujr-Latn-2011.yaml +0 -64
  92. data/maps/alalc-hin-Deva-Latn-1997.yaml +0 -303
  93. data/maps/alalc-hin-Deva-Latn-2011.yaml +0 -65
  94. data/maps/alalc-kan-Kana-Latn-1997.yaml +0 -274
  95. data/maps/alalc-kan-Kana-Latn-2011.yaml +0 -63
  96. data/maps/alalc-kat-Geok-Latn-1997.yaml +0 -111
  97. data/maps/alalc-kat-Geor-Latn-1997.yaml +0 -150
  98. data/maps/alalc-kor-Hang-Latn-1997.yaml +0 -98
  99. data/maps/alalc-mal-Mlym-Latn-1997.yaml +0 -303
  100. data/maps/alalc-mal-Mlym-Latn-2012.yaml +0 -73
  101. data/maps/alalc-mar-Deva-Latn-1997.yaml +0 -189
  102. data/maps/alalc-mar-Deva-Latn-2011.yaml +0 -45
  103. data/maps/alalc-mkd-Cyrl-Latn-1997.yaml +0 -114
  104. data/maps/alalc-mkd-Cyrl-Latn-2013.yaml +0 -103
  105. data/maps/alalc-mon-Cyrl-Latn-1997.yaml +0 -220
  106. data/maps/alalc-ori-Orya-Latn-1997.yaml +0 -284
  107. data/maps/alalc-ori-Orya-Latn-2011.yaml +0 -67
  108. data/maps/alalc-pan-Guru-Latn-1997.yaml +0 -256
  109. data/maps/alalc-pan-Guru-Latn-2011.yaml +0 -78
  110. data/maps/alalc-per-Arab-Latn-1997.yaml +0 -375
  111. data/maps/alalc-pli-Deva-Latn-2012.yaml +0 -144
  112. data/maps/alalc-pra-Deva-Latn-2012.yaml +0 -47
  113. data/maps/alalc-rus-Cyrl-Latn-1997.yaml +0 -225
  114. data/maps/alalc-rus-Cyrl-Latn-2012.yaml +0 -162
  115. data/maps/alalc-san-Deva-Latn-2012.yaml +0 -241
  116. data/maps/alalc-sin-Sinh-Latn-1997.yaml +0 -292
  117. data/maps/alalc-sin-Sinh-Latn-2011.yaml +0 -71
  118. data/maps/alalc-srp-Cyrl-Latn-1997.yaml +0 -118
  119. data/maps/alalc-srp-Cyrl-Latn-2013.yaml +0 -135
  120. data/maps/alalc-tam-Taml-Latn-1997.yaml +0 -62
  121. data/maps/alalc-tam-Taml-Latn-2011.yaml +0 -58
  122. data/maps/alalc-tel-Telu-Latn-1997.yaml +0 -284
  123. data/maps/alalc-tel-Telu-Latn-2011.yaml +0 -64
  124. data/maps/alalc-ukr-Cyrl-Latn-1997.yaml +0 -145
  125. data/maps/alalc-ukr-Cyrl-Latn-2011.yaml +0 -16
  126. data/maps/apcbg-bul-Cyrl-Latn-1995.yaml +0 -283
  127. data/maps/az-aze-Cyrl-Latn-1939.yaml +0 -105
  128. data/maps/az-aze-Cyrl-Latn-1958.yaml +0 -45
  129. data/maps/bas-rus-Cyrl-Latn-2017-bss.yaml +0 -174
  130. data/maps/bas-rus-Cyrl-Latn-2017-oss.yaml +0 -169
  131. data/maps/bgn-jpn-Hrkt-Latn-1962.yaml +0 -292
  132. data/maps/bgn-kor-Hang-Latn-1943.yaml +0 -35
  133. data/maps/bgn-kor-Kore-Latn-1943.yaml +0 -31
  134. data/maps/bgna-bul-Cyrl-Latn-2006.yaml +0 -208
  135. data/maps/bgna-bul-Cyrl-Latn-2009.yaml +0 -208
  136. data/maps/bgnpcgn-amh-Ethi-Latn-1967.yaml +0 -532
  137. data/maps/bgnpcgn-ara-Arab-Latn-1956.yaml +0 -598
  138. data/maps/bgnpcgn-arm-Armn-Latn-1981.yaml +0 -108
  139. data/maps/bgnpcgn-aze-Cyrl-Latn-1993.yaml +0 -111
  140. data/maps/bgnpcgn-bak-Cyrl-Latn-2007.yaml +0 -188
  141. data/maps/bgnpcgn-bal-Arab-Latn-2008.yaml +0 -329
  142. data/maps/bgnpcgn-bel-Cyrl-Latn-1979.yaml +0 -289
  143. data/maps/bgnpcgn-bul-Cyrl-Latn-1952.yaml +0 -119
  144. data/maps/bgnpcgn-bul-Cyrl-Latn-2013.yaml +0 -42
  145. data/maps/bgnpcgn-che-Cyrl-Latn-2008.yaml +0 -184
  146. data/maps/bgnpcgn-div-Thaa-Latn-1988.yaml +0 -75
  147. data/maps/bgnpcgn-ell-Grek-Latn-1962.yaml +0 -705
  148. data/maps/bgnpcgn-ell-Grek-Latn-1996.yaml +0 -23
  149. data/maps/bgnpcgn-far-Latn-Latn-1964.yaml +0 -28
  150. data/maps/bgnpcgn-fas-Arab-Latn-1956.yaml +0 -96
  151. data/maps/bgnpcgn-isl-Latn-Latn-1964.yaml +0 -37
  152. data/maps/bgnpcgn-jpn-Hrkt-Latn-1976.yaml +0 -257
  153. data/maps/bgnpcgn-kat-Geor-Latn-1981.yaml +0 -131
  154. data/maps/bgnpcgn-kat-Geor-Latn-2009.yaml +0 -42
  155. data/maps/bgnpcgn-kaz-Cyrl-Latn-1979.yaml +0 -247
  156. data/maps/bgnpcgn-kir-Cyrl-Latn-1979.yaml +0 -218
  157. data/maps/bgnpcgn-kor-Hang-Latn-kn-1945.yaml +0 -253
  158. data/maps/bgnpcgn-kor-Hang-Latn-rok-2011.yaml +0 -48
  159. data/maps/bgnpcgn-kor-Kore-Latn-rok-2011.yaml +0 -48
  160. data/maps/bgnpcgn-kur-Arab-Latn-2007.yaml +0 -249
  161. data/maps/bgnpcgn-mkd-Cyrl-Latn-1981.yaml +0 -163
  162. data/maps/bgnpcgn-mkd-Cyrl-Latn-2013.yaml +0 -190
  163. data/maps/bgnpcgn-mon-Cyrl-Latn-1964.yaml +0 -223
  164. data/maps/bgnpcgn-nep-Deva-Latn-2011.yaml +0 -230
  165. data/maps/bgnpcgn-per-Arab-Latn-1958.yaml +0 -338
  166. data/maps/bgnpcgn-prs-Arab-Latn-2007.yaml +0 -673
  167. data/maps/bgnpcgn-prs-Arab-Latn-yaghoubi.yaml +0 -459
  168. data/maps/bgnpcgn-pus-Arab-Latn-1968.yaml +0 -377
  169. data/maps/bgnpcgn-rue-Cyrl-Latn-2016.yaml +0 -168
  170. data/maps/bgnpcgn-rus-Cyrl-Latn-1947.yaml +0 -318
  171. data/maps/bgnpcgn-srp-Cyrl-Latn-1962.yaml +0 -73
  172. data/maps/bgnpcgn-srp-Cyrl-Latn-2005.yaml +0 -170
  173. data/maps/bgnpcgn-tat-Cyrl-Latn-2007.yaml +0 -220
  174. data/maps/bgnpcgn-tgk-Cyrl-Latn-1994.yaml +0 -240
  175. data/maps/bgnpcgn-ukr-Cyrl-Latn-1965.yaml +0 -166
  176. data/maps/bgnpcgn-ukr-Cyrl-Latn-2019.yaml +0 -119
  177. data/maps/bgnpcgn-urd-Arab-Latn-2007.yaml +0 -459
  178. data/maps/bgnpcgn-uzb-Cyrl-Latn-1979.yaml +0 -127
  179. data/maps/bgnpcgn-uzb-Cyrl-Latn-2000.yaml +0 -82
  180. data/maps/bgnpcgn-zho-Hans-Latn-1979.yaml +0 -7456
  181. data/maps/bis-asm-Beng-Latn-13194-1991.yaml +0 -159
  182. data/maps/bis-ben-Beng-Latn-13194-1991.yaml +0 -156
  183. data/maps/bis-dev-Deva-Latn-13194-1991.yaml +0 -184
  184. data/maps/bis-guj-Gujr-Latn-13194-1991.yaml +0 -181
  185. data/maps/bis-kan-Kana-Latn-13194-1991.yaml +0 -173
  186. data/maps/bis-mlm-Mlym-Latn-13194-1991.yaml +0 -176
  187. data/maps/bis-ori-Orya-Latn-13194-1991.yaml +0 -175
  188. data/maps/bis-pnj-Guru-Latn-13194-1991.yaml +0 -175
  189. data/maps/bis-tel-Telu-Latn-13194-1991.yaml +0 -170
  190. data/maps/bis-tml-Taml-Latn-13194-1991.yaml +0 -155
  191. data/maps/by-bel-Cyrl-Latn-1998.yaml +0 -172
  192. data/maps/by-bel-Cyrl-Latn-2007.yaml +0 -115
  193. data/maps/din-grc-Grek-Latn-31634-2011-t1.yaml +0 -899
  194. data/maps/din-hin-Deva-Latn-33904-2018.yaml +0 -100
  195. data/maps/din-kat-Geor-Latn-32707-2010.yaml +0 -145
  196. data/maps/din-mar-Deva-Latn-33904-2018.yaml +0 -84
  197. data/maps/din-nep-Deva-Latn-33904-2018.yaml +0 -119
  198. data/maps/din-pli-Deva-Latn-33904-2018.yaml +0 -75
  199. data/maps/din-pra-Deva-Latn-33904-2018.yaml +0 -63
  200. data/maps/din-san-Deva-Latn-33904-2018.yaml +0 -338
  201. data/maps/din-tam-Taml-Latn-33903-2016.yaml +0 -213
  202. data/maps/dos-nep-Deva-Latn-1997.yaml +0 -47
  203. data/maps/elot-ell-Grek-Latn-743-1982-tl.yaml +0 -684
  204. data/maps/elot-ell-Grek-Latn-743-1982-ts.yaml +0 -680
  205. data/maps/elot-ell-Grek-Latn-743-2001-tl.yaml +0 -19
  206. data/maps/elot-ell-Grek-Latn-743-2001-ts.yaml +0 -31
  207. data/maps/ggg-kat-Geor-Latn-2002.yaml +0 -92
  208. data/maps/gki-bel-Cyrl-Latn-1992.yaml +0 -33
  209. data/maps/gki-bel-Cyrl-Latn-2000.yaml +0 -201
  210. data/maps/gost-rus-Cyrl-Latn-16876-71-1983.yaml +0 -190
  211. data/maps/gost-rus-Cyrl-Latn-7.79-2000-2002.yaml +0 -157
  212. data/maps/hk-yue-Hani-Latn-1888.yaml +0 -38497
  213. data/maps/icao-bel-Cyrl-Latn-9303.yaml +0 -136
  214. data/maps/icao-bul-Cyrl-Latn-9303.yaml +0 -118
  215. data/maps/icao-fas-Arab-Latn-9303.yaml +0 -103
  216. data/maps/icao-heb-Hebr-Latn-9303.yaml +0 -151
  217. data/maps/icao-mkd-Cyrl-Latn-9303.yaml +0 -117
  218. data/maps/icao-rus-Cyrl-Latn-9303.yaml +0 -117
  219. data/maps/icao-srp-Cyrl-Latn-9303.yaml +0 -117
  220. data/maps/icao-ukr-Cyrl-Latn-9303.yaml +0 -119
  221. data/maps/iso-ara-Arab-Latn-233-1984.yaml +0 -323
  222. data/maps/iso-asm-Beng-Latn-15919-2001.yaml +0 -75
  223. data/maps/iso-ben-Beng-Latn-15919-2001.yaml +0 -175
  224. data/maps/iso-ell-Grek-Latn-843-1997-t1.yaml +0 -613
  225. data/maps/iso-ell-Grek-Latn-843-1997-t2.yaml +0 -44
  226. data/maps/iso-guj-Gujr-Latn-15919-2001.yaml +0 -220
  227. data/maps/iso-hin-Deva-Latn-15919-2001.yaml +0 -87
  228. data/maps/iso-inc-Deva-Latn-15919-2001.yaml +0 -61
  229. data/maps/iso-jpn-Hrkt-Latn-3602-1989.yaml +0 -66
  230. data/maps/iso-kan-Kana-Latn-15919-2001.yaml +0 -220
  231. data/maps/iso-kat-Geor-Latn-9984-1996.yaml +0 -145
  232. data/maps/iso-kor-Hang-Latn-1996-method1.yaml +0 -240
  233. data/maps/iso-kor-Hang-Latn-1996-method2.yaml +0 -226
  234. data/maps/iso-mal-Mlym-Latn-15919-2001.yaml +0 -281
  235. data/maps/iso-mar-Deva-Latn-15919-2001.yaml +0 -75
  236. data/maps/iso-nep-Deva-Latn-15919-2001.yaml +0 -87
  237. data/maps/iso-ori-Orya-Latn-15919-2001.yaml +0 -193
  238. data/maps/iso-pan-Guru-Latn-15919-2001.yaml +0 -222
  239. data/maps/iso-pli-Beng-Latn-15919-2001.yaml +0 -73
  240. data/maps/iso-pli-Deva-Latn-15919-2001.yaml +0 -74
  241. data/maps/iso-pli-Sinh-Latn-15919-2001.yaml +0 -219
  242. data/maps/iso-pli-Thai-Latn-15919-2001.yaml +0 -55
  243. data/maps/iso-pra-Deva-Latn-15919-2001.yaml +0 -59
  244. data/maps/iso-prs-Arab-Latn-233-3-1999.yaml +0 -366
  245. data/maps/iso-rus-Cyrl-Latn-9-1995.yaml +0 -271
  246. data/maps/iso-san-Deva-Latn-15919-2001.yaml +0 -220
  247. data/maps/iso-tam-Taml-Latn-15919-2001.yaml +0 -159
  248. data/maps/iso-tel-Telu-Latn-15919-2001.yaml +0 -220
  249. data/maps/iso-tha-Thai-Latn-11940-1998.yaml +0 -109
  250. data/maps/kp-kor-Hang-Latn-2002.yaml +0 -909
  251. data/maps/lshk-yue-Hani-Latn-jyutping-1993.yaml +0 -44820
  252. data/maps/masm-mon-Cyrl-Latn-5217-2012.yaml +0 -163
  253. data/maps/masm-mon-Latn-Cyrl-5217-2012.yaml +0 -200
  254. data/maps/mext-jpn-Hrkt-Latn-1954.yaml +0 -411
  255. data/maps/moct-kor-Hang-Latn-2000.yaml +0 -807
  256. data/maps/mofa-jpn-Hrkt-Latn-1989.yaml +0 -541
  257. data/maps/mv-div-Thaa-Latn-1987.yaml +0 -200
  258. data/maps/mvd-bel-Cyrl-Latn-2008.yaml +0 -225
  259. data/maps/mvd-bel-Cyrl-Latn-2010.yaml +0 -63
  260. data/maps/mvd-rus-Cyrl-Latn-2008.yaml +0 -109
  261. data/maps/mvd-rus-Cyrl-Latn-2010.yaml +0 -37
  262. data/maps/odni-ara-Arab-Latn-2004.yaml +0 -137
  263. data/maps/odni-ara-Arab-Latn-2015.yaml +0 -315
  264. data/maps/odni-aze-Cyrl-Latn-2015.yaml +0 -144
  265. data/maps/odni-bel-Cyrl-Latn-2015.yaml +0 -148
  266. data/maps/odni-bul-Cyrl-Latn-2005.yaml +0 -90
  267. data/maps/odni-bul-Cyrl-Latn-2015.yaml +0 -96
  268. data/maps/odni-che-Cyrl-Latn-2015.yaml +0 -169
  269. data/maps/odni-fas-Arab-Latn-2004.yaml +0 -276
  270. data/maps/odni-fas-Arab-Latn-2015.yaml +0 -406
  271. data/maps/odni-hin-Deva-Latn-2004.yaml +0 -182
  272. data/maps/odni-hin-Deva-Latn-2015.yaml +0 -258
  273. data/maps/odni-kat-Geor-Latn-2015.yaml +0 -87
  274. data/maps/odni-kaz-Cyrl-Latn-2015.yaml +0 -148
  275. data/maps/odni-kir-Cyrl-Latn-2015.yaml +0 -136
  276. data/maps/odni-kor-Hang-Latn-2015.yaml +0 -375
  277. data/maps/odni-mkd-Cyrl-Latn-2005.yaml +0 -21
  278. data/maps/odni-mkd-Cyrl-Latn-2015.yaml +0 -122
  279. data/maps/odni-prs-Arab-Latn-2004.yaml +0 -123
  280. data/maps/odni-prs-Arab-Latn-2015.yaml +0 -228
  281. data/maps/odni-rus-Cyrl-Latn-2015.yaml +0 -77
  282. data/maps/odni-srp-Cyrl-Latn-2005.yaml +0 -36
  283. data/maps/odni-srp-Cyrl-Latn-2015.yaml +0 -129
  284. data/maps/odni-tat-Cyrl-Latn-2015.yaml +0 -142
  285. data/maps/odni-tgk-Cyrl-Latn-2015.yaml +0 -148
  286. data/maps/odni-tuk-Cyrl-Latn-2015.yaml +0 -170
  287. data/maps/odni-uig-Cyrl-Latn-2015.yaml +0 -138
  288. data/maps/odni-ukr-Cyrl-Latn-2015.yaml +0 -161
  289. data/maps/odni-urd-Arab-Latn-2015.yaml +0 -221
  290. data/maps/odni-uzb-Cyrl-Latn-2015.yaml +0 -166
  291. data/maps/royin-tha-Thai-Latn-1939-generic.yaml +0 -90
  292. data/maps/royin-tha-Thai-Latn-1968.yaml +0 -183
  293. data/maps/royin-tha-Thai-Latn-1999-chained.yaml +0 -180
  294. data/maps/royin-tha-Thai-Latn-1999.yaml +0 -80
  295. data/maps/sac-zho-Hans-Latn-1979.yaml +0 -24763
  296. data/maps/sasm-mon-Mong-Latn-general-1978.yaml +0 -389
  297. data/maps/sasm-mon-Mong-Latn-phonetic-1978.yaml +0 -354
  298. data/maps/ses-ara-Arab-Latn-1930.yaml +0 -283
  299. data/maps/stategeocadastre-ukr-Cyrl-Latn-1993.yaml +0 -222
  300. data/maps/ua-ukr-Cyrl-Latn-1996.yaml +0 -197
  301. data/maps/ua-ukr-Cyrl-Latn-2007.yaml +0 -75
  302. data/maps/ua-ukr-Cyrl-Latn-2010.yaml +0 -192
  303. data/maps/un-amh-Ethi-Latn-2016.yaml +0 -602
  304. data/maps/un-ara-Arab-Latn-1971.yaml +0 -139
  305. data/maps/un-ara-Arab-Latn-1972.yaml +0 -159
  306. data/maps/un-ara-Arab-Latn-2017.yaml +0 -420
  307. data/maps/un-asm-Beng-Latn-1972.yaml +0 -223
  308. data/maps/un-bel-Cyrl-Latn-2007.yaml +0 -114
  309. data/maps/un-ben-Beng-Latn-2016.yaml +0 -534
  310. data/maps/un-ell-Grek-Latn-1987-phonetic.yaml +0 -780
  311. data/maps/un-ell-Grek-Latn-1987-tl.yaml +0 -31
  312. data/maps/un-ell-Grek-Latn-1987-ts.yaml +0 -19
  313. data/maps/un-guj-Gujr-Latn-1972.yaml +0 -229
  314. data/maps/un-hin-Deva-Latn-2016.yaml +0 -316
  315. data/maps/un-kan-Kana-Latn-2016.yaml +0 -254
  316. data/maps/un-mal-Mlym-Latn-1972.yaml +0 -251
  317. data/maps/un-mar-Deva-Latn-2016.yaml +0 -102
  318. data/maps/un-mon-Mong-Latn-general-2013.yaml +0 -264
  319. data/maps/un-mon-Mong-Latn-phonetic-2013.yaml +0 -264
  320. data/maps/un-nep-Deva-Latn-1972.yaml +0 -269
  321. data/maps/un-nep-Deva-Latn-2013.yaml +0 -74
  322. data/maps/un-ori-Orya-Latn-1972.yaml +0 -247
  323. data/maps/un-pan-Guru-Latn-1972.yaml +0 -402
  324. data/maps/un-prs-Arab-Latn-1967.yaml +0 -236
  325. data/maps/un-rus-Cyrl-Latn-1987.yaml +0 -166
  326. data/maps/un-tam-Taml-Latn-1972.yaml +0 -194
  327. data/maps/un-tel-Telu-Latn-1972.yaml +0 -270
  328. data/maps/un-ukr-Cyrl-Latn-1998.yaml +0 -53
  329. data/maps/un-ukr-Cyrl-Latn-2012.yaml +0 -162
  330. data/maps/un-urd-Arab-Latn-1972.yaml +0 -405
  331. data/maps/var-amh-Ethi-Latn-eae-2003.yaml +0 -466
  332. data/maps/var-gez-Ethi-Latn-eae-2003.yaml +0 -76
  333. data/maps/var-hin-Deva-Latn-hunterian-1872.yaml +0 -221
  334. data/maps/var-jpn-Hrkt-Latn-hepburn-1886.yaml +0 -406
  335. data/maps/var-jpn-Hrkt-Latn-hepburn-1954.yaml +0 -386
  336. data/maps/var-kor-Hang-Hang-jamo.yaml +0 -11193
  337. data/maps/var-kor-Hang-Latn-mr-1939.yaml +0 -1054
  338. data/maps/var-kor-Kore-Hang-2013.yaml +0 -59754
  339. data/maps/var-kor-Kore-Latn-mr-1939.yaml +0 -36
  340. data/maps/var-mar-Deva-Latn-hunterian-1872.yaml +0 -43
  341. data/maps/var-mon-Mong-Latn-1930.yaml +0 -102
  342. data/maps/var-mon-Mong-Latn-lessing.yaml +0 -272
  343. data/maps/var-mon-Mong-Latn-vpmc.yaml +0 -274
  344. data/maps/var-pra-Deva-Latn-iast-1912.yaml +0 -30
  345. data/maps/var-san-Deva-Latn-iast-1912.yaml +0 -149
  346. data/maps/var-tha-Thai-Thai-phonemic.yaml +0 -59
  347. data/maps/var-tha-Thai-Zsym-ipa.yaml +0 -301
  348. data/maps/var-zho-Hani-Latn-wd-1979.yaml +0 -38912
  349. data/spec/interscript/filenames_spec.rb +0 -21
  350. data/spec/interscript/mapping_spec.rb +0 -42
  351. data/spec/interscript_spec.rb +0 -37
  352. data/spec/spec_helper.rb +0 -3
@@ -1,598 +0,0 @@
1
- ---
2
- authority_id: bgnpcgn
3
- id: 1956
4
- language: iso-639-2:ara
5
- source_script: Arab
6
- destination_script: Latn
7
- name: ROMANIZATION OF ARABIC -- BGN/PCGN 1956 System
8
- alias:
9
- ogc11122:
10
- code: ara_Arab2Latn_BGN_1956
11
- description: Arabic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1956 System
12
- url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/858000/ROMANIZATION_OF_ARABIC.pdf
13
- creation_date: 1956
14
- confirmation date: 2019-12
15
- description: |
16
-
17
- This System was adopted by the BGN in 1946 and by the PCGN in 1956 and is applied by BGN and PCGN in the systematic romanization of Arabic geographical names in Bahrain, Egypt, Iraq, Jordan, Kuwait, Libya, Oman, Qatar, Saudi Arabia, Syria, the United Arab Emirates, Yemen, the West Bank and Gaza Strip.
18
-
19
- Uniform results in the romanization of Arabic are difficult to obtain, since vowel points and diacritical marks are generally omitted from both handwriting and printed script. It follows that for correct identification of the words which appear in any particular name, knowledge of its standard Arabic-script spelling including proper pointing, and recognition of dialectal and idiosyncratic deviations are essential.
20
-
21
- In order to bring about uniformity in the Roman-script spelling of geographical names in Arabic-language areas, the system is based insofar as possible on fully pointed Modern Standard Arabic (MSA). In the interest of clarity, vowel pointing to indicate short vowels has been applied to the examples given below, and examples of the, more usual, unpointed script have also been provided; it should also be noted that the dots which occur on some characters of the Arabic script are not vowels but rather are an integral part of the base consonant.
22
-
23
- Arabic script is written from right to left, and does not make a distinction between upper and lower case.
24
-
25
-
26
-
27
- notes: |
28
-
29
- - (NOTE 1) The symbol ◌ is used in this system to symbolise any Arabic consonant character. It is not itself an Arabic letter.
30
-
31
- - (NOTE 2) Hamzah (ء) is written in Arabic in association with most instances of initial alif, except those which belong to the definite article al or which bear a maddah (see note 9). Hamzah is written above the alif ( أَ) if the accompanying short vowel is a fatḩah or ḑammah and usually below the alif( أ ) if the accompanying short vowel is a kasrah.
32
- When the purpose is to indicate the presence of a glottal stop, hamzah is written over medial alif ( أ ), wāw (ؤ) and yā’, typically without dots (ئ); or following final alif ( أ ء ), these characters serving only to “bear” the hamzah. Hamzah following kasrah ( ) is written (ئ); the yā’ is usually in the initial or medial form and the dots are omitted e.g. bi’r ( بئ ر ).
33
- Hamzah following ḑammah ( ) is written (ؤ). Hamzah following a long vowel is written without a bearer and is positioned on the line of print like a regular character, e.g. صنعاء Şan‘ā’. The romanization of hamzah (’ - Unicode encoding 2019) should always be carefully distinguished from that of ‘ayn (‘ - Unicode encoding 2018).
34
-
35
- - (NOTE 3) Alif (ا) occurs with the following uses
36
- a. Initially, it indicates that the word begins with a vowel or diphthong; the alif itself is not romanized, but rather “carries” the short vowel, which is romanized; e.g., ظب ي أبو → Abū Z̧aby.
37
-
38
- b. With maddah (آ – row 18 in the vowel table), it is represented ā; e.g., مُ عيط آلب و → Ālbū Mu‘ayţ. See also note 9.
39
-
40
- c. Medially and finally it is represented ā; e.g., ب ا ب → Bāb, صيدا → Şaydā.
41
-
42
- d. Medially and finally, alif may serve as the bearer of hamzah, e.g. رأس → ra’s. See also note 2.
43
-
44
- - (NOTE 4) The tā’ marbūţah character (ة), which looks like hā’ with two dots above and occurs only at the end of words, is romanized h, except in an iḑāfah noun phrase construction, where it is romanized t, in accordance with pronunciation. e.g. Muḩāfaz̧ah (as an isolated word) but Muḩāfaz̧at Baghdād. In exceptional cases, when it is necessary to distinguish it from the tā’ marbūţah, the ending fatḩah + hā’ ( ه ) may be romanized a·h when the character hā’ (ه) is pronounced as such. Example Muntaza·h. (see also special rule 13). The tā marbūţah is always preceded by the short vowel fatḩah ( ) and is therefore romanized as ah or at, except when it is preceded by alif when it is romanized āh (not āah), e.g. Ḩamāh (حماة ), and as āt within an iḑāfah construction.
45
-
46
- - (NOTE 5) The character yā’ (in final form but without dots) preceded by the vowel point fatḩah is known as alif maqşūrah. This character may also be pointed ى and should be romanized á. See character 7 in the vowel table.
47
-
48
- - (NOTE 6) The classical Arabic grammatical endings written with the nunation symbols (tanwīn) may be romanized, when necessary, by an, in, un. In modern spoken Arabic, these endings have become silent and should not be romanized e.g. classical alifun; modern alif.
49
-
50
- - (NOTE 7) Doubled consonant sounds are represented in Arabic script by placing a shaddah ( ) over a consonant character, although like the short vowels the shaddah may not always be written. In romanization the letter should be doubled, e.g. Quwwah, ‘Abbās. However, the combination of the consonant character yā’ with a shaddah preceded by a kasrah ( ي ) at the end of a word is romanized ī, e.g. Gharbī; a word ending kasrah + yā’ with a shaddah + tā’ marbūţah is romanized īyah (rather than iyyah), e.g. ال س ل يمانِ ية
51
- is romanized As Sulaymānīyah and not As Sulaymāniyyah; and when the kasrah + yā’ + shaddah combination is followed by the sound masculine plural ending ( يين or يون ) it should be romanized as –īyīn/īyūn, e.g. ساحة العباسيين should be romanized as Sāḩat al ‘Abbāsīyīn.
52
-
53
- - (NOTE 8) Hamzat al waşl (ٱ), which is utilized only in the pointing of classical Arabic, is romanized ’ as illustrated in the classical form of its name hamzatu’l waşli.
54
-
55
- - (NOTE 9) Since maddah ( أ ), which is placed over alif ( أ ), often occurs in word-initial position, no confusion results from the use of ā for alif maddah ( أ ) as well as for fatḩah followed by alif ( اَ ).
56
-
57
- - (NOTE 10) The ligature ل ا represents lām-alif, and should be romanized lā.
58
-
59
- - (NOTE 11) In word initial position the combination Alif +Wāw (او ) is sometimes used to render an initial long vowel sound in words of non-Arabic origin. Where this is clearly the case it should be romanized Ū. In words of Arabic or uncertain origin it should be romanized Aw. In word-medial or word-final position it should always be romanized āw. Similarly the combination Alif +Yā’ (اي ) is romanized Ī to render an initial long vowel sound but as āy in word-medial or word-final position.
60
-
61
- # SPECIAL RULES
62
-
63
- - The Arabic definite article al (ال ) should be treated as follows |
64
- a. Initial definite articles should be capitalized and hyphens should not be used to connect parts of names, e.g. Ash Shāriqah. When appearing medially in a name the initial ‘a’ should be lower case, e.g. Tall al Laḩm.
65
-
66
- b. When the definite article precedes a word beginning with one of the “sun letters” t, th, d, dh, r, z, s, sh, ş, ḑ, ţ, z̧, l, or n – the l is assimilated in pronunciation and romanization, thus yielding, for example, the romanization Ar Riyāḑ, rather than Al Riyāḑ for ال ريا ض .
67
-
68
- c. If sources contradict over the inclusion or non-inclusion of the definite article in a name, preference should be given to the form with the article.
69
-
70
- - Conjunctions and prepositions should be romanized according to their written form in Arabic script and should be lower case. In cases where the conjunction or preposition ends in a long or short vowel any assimilated pronunciation should not be shown in the romanized form. e.g. Khabb wa ash Sha‘f (خب والشعف ). |
71
-
72
- There are two exceptions to this rule
73
-
74
- a. In the case of the preposition li (ل), where the alif of the definite article is assimilated in the written form as well as pronunciation, the written form should be shown in romanization as follows Mişr liţ Ţayarān (مصر للطيران ); Ash Sharikah al ‘Āmmah lil Maghāzil (الشركة العامة للمغازل ).
75
-
76
- b. In the case of the preposition bi (ب), the alif of the definite article is assimilated in pronunciation and, although the alif remains in the written form the short vowel it carries changes from ‘a’ to ‘i’. For example Al Qaryah bid Duwayr (القرية بالدوير ) but Ad Duwayr (الدوير ); and Al Ḩarajah bil Qur’ān (الحرجة بالقرآن ) but Al Qur’ān (القرآن ).
77
-
78
-
79
- - The Arabic word for God ( لله) should be written Allāh. The alif khanjarīyah (dagger alif) ( ) above the second ل (lām) in the word لله , like the short vowels, is not usually written but should be romanized ā, like a full-size alif. This diacritical mark appears in a few other Arabic words, for instance on the alif maqşūrah as described in note 5.
80
-
81
- - Names which consist of noun phrases (see also note 4) should be written as separate words. The definite article within such names should be romanized al, not ul, e.g., ‘Abd Allāh, ‘Abd ar Raḩmān, Dhū al Faqār, and as noted in special rule 1, the medial al should be lower case.
82
-
83
- - The Arabic word ب ن should be romanized Bin rather than Ibn whenever written without alif, that is between two proper nouns, e.g., ‘Umar Bin al Khaţţāb. Where it appears with alif ( )اب ن , it should be romanized Ibn.
84
-
85
- - The Turkish word Paşa should be romanized from Arabic script as Bāshā. The Turkish word Bey should be romanized as Bey in Egyptian names, no matter how it is written in Arabic-language sources, but in other Arabic areas it should be romanized as Bak where written بك and as Bayk when written بيك .
86
-
87
- - The modern colloquial word Sīdī (سيدي ) should be give precedence over the classical form Sayyidī. This does not preclude the spelling Sayyidī if the latter is indicated by the Arabic script or other evidence – for instance, if the yā’ is written with a shaddah ( ).
88
-
89
- - The colloquial word Bū should not be changed to the standard form Abū.
90
-
91
- - The colloquial word for water, written مي ة on Arabic maps, should be romanized Mayyat.
92
-
93
- - Place names of Aramaic origin in Syria often contain initial consonant clusters consisting of b plus another consonant such as l or h. In romanization, the clusters bl, bh, etc., should be so represented.
94
-
95
- - In names containing the Arabic word for back, ridge, or hill, appearing as either ظهر (Z̧ahr) or ضه ر (Ḑahr) in Arabic sources, the word should be romanized to reflect the particular Arabic spelling shown. Where sources differ, preference should be given to the form found on the most authoritative source.
96
-
97
- - In formal Arabic, the spelling of some words ending in a long vowel character may change according to that word’s grammatical function in a sentence. For example, the personal name Abū Bakr (ابو بكر ) would become Abī Bakr (ابي بكر ) when preceded by a generic in an iḑāfah construction (used in Moroccan Arabic Script) e.g. Shāri‘ Abī Bakr (شارع ابي بكر – Abu Bakr Street). The spelling of such words as found on the most authoritative source should be used in the romanized form of the name. Other common words affected by this rule are Banū/Banī (sons of…) and Dhū/Dhī (owner of ...). Examples of names in this category include Jabal Abā aş Şabbān (جبل ابا الصبان ) and Muḩāfaz̧at Dhī Qār ( محافظة ذي قار ).
98
-
99
- - Occasionally the character sequences ك ه , ده , س ه , and ت occur. They may be romanized k·h, d·h, s·h, and t·h in order to differentiate these romanizations from the digraphs kh, dh, sh, and th, which are used to represent the characters خ, ذ, ش, and ث respectively. See also note 4.
100
-
101
-
102
- tests:
103
-
104
- - source: قُرآن
105
- expected: Qur’ān
106
-
107
- - source: أَبُو ظَبْي
108
- expected: Abū Z̧aby
109
-
110
- - source: بِئْر زَيْت
111
- expected: Bi’r Zayt
112
-
113
- - source: أُمّ العَمَد
114
- expected: Umm al ‘Amad
115
-
116
- - source: البَحرَيْن
117
- expected: Al Baḩrayn
118
-
119
- - source: الكُوت
120
- expected: Al Kūt
121
-
122
- - source: الثُّلَيْثُوَات
123
- expected: Ath Thulaythuwāt
124
-
125
- - source: الجَزِيرَة
126
- expected: Al Jazīrah
127
-
128
- - source: المَحْمُودِيَّة
129
- expected: Al Maḩmūdīyah
130
-
131
- - source: خَيْبَر
132
- expected: Khaybar
133
-
134
- - source: دَمَنْهُور
135
- expected: Damanhūr
136
-
137
- - source: ذَهَب
138
- expected: Dhahab
139
-
140
- - source: الرَّوْضة
141
- expected: Ar Rawḑah
142
-
143
- - source: زُوَارَة
144
- expected: Zuwārah
145
-
146
- - source: السُّلَيْمانِيَّة
147
- expected: As Sulaymānīyah
148
-
149
- - source: الشَّام
150
- expected: Ash Shām
151
-
152
- - source: قَيْصُومَة
153
- expected: Qayşūmah
154
-
155
- - source: ضَوْر
156
- expected: Ḑawr
157
-
158
- - source: القُنَيْطِرَة
159
- expected: Al Qunayţirah
160
-
161
- - source: ظُفَار
162
- expected: Z̧ufār
163
-
164
- - source: أَبُو عَرِيش
165
- expected: Abū ‘Arīsh
166
-
167
- - source: بَغْداد
168
- expected: Baghdād
169
-
170
- - source: الفُرات
171
- expected: Al Furāt
172
-
173
- - source: قَطَر
174
- expected: Qaţar
175
-
176
- - source: الكُوَيْت
177
- expected: Al Kuwayt
178
-
179
- - source: حَلَب
180
- expected: Ḩalab
181
-
182
- - source: مَكَّة
183
- expected: Makkah
184
-
185
- - source: نَخْل
186
- expected: Nakhl
187
-
188
- - source: جَبَل هارُون
189
- expected: Jabal Hārūn
190
-
191
- - source: وادِي غَضَا
192
- expected: Wādī Ghaḑā
193
-
194
- - source: اليَمَن
195
- expected: Al Yaman
196
-
197
- - source: القاهِرَة
198
- expected: Al Qāhirah
199
-
200
- - source: المَدِينَة المُنَوَّرَة
201
- expected: Al Madīnah al Munawwarah
202
-
203
- - source: مُحَافَظَة دِمَشْق
204
- expected: Muḩāfaz̧at Dimashq
205
-
206
- - source: البَصْرَة
207
- expected: Al Başrah
208
-
209
- - source: الرِّيَاض
210
- expected: Ar Riyāḑ
211
-
212
- - source: القُدْس
213
- expected: Al Quds
214
-
215
- - source: بَاب المَنْدَب
216
- expected: Bāb al Mandab
217
-
218
- - source: المَدِينة
219
- expected: Al Madīnah
220
-
221
- - source: صُور
222
- expected: Şūr
223
-
224
- - source: مَرْسَىٰ مَطْرُوح
225
- expected: Marsá Maţrūḩ
226
-
227
- - source: صَيْدَا
228
- expected: Şaydā
229
-
230
- - source: الدَّوحَة
231
- expected: Ad Dawḩah
232
-
233
- - source: مُحَمَّد
234
- expected: Muḩammad
235
-
236
- - source: أُوزُونْلَار
237
- expected: Ūzūnlār
238
-
239
- - source: أَوْسَط
240
- expected: Awsaţ
241
-
242
- - source: سَنَاو
243
- expected: Sanāw
244
-
245
- - source: اِيرَان
246
- expected: Īrān
247
-
248
- - source: تَلّ السَّرَاي
249
- expected: Tall as Sarāy
250
-
251
- - source: آلْبُو مُعَيْط
252
- expected: Ālbū Mu‘ayţ
253
-
254
- - source: سَلْمان پَاك
255
- expected: Salmān Pāk
256
-
257
- - source: تَلّ كُوچِك الصَّغِير
258
- expected: Tall Kūchik aş Şaghīr
259
-
260
- # - source: مَزََّة ڤِيلَّات غَرْبِيَّة
261
- # expected: Mazzah Vīllāt Gharbīyah
262
-
263
- - source: ڨَفْصَة
264
- expected: Gafşah
265
-
266
- - source: تَلّ گَمْر
267
- expected: Tall Gamr
268
-
269
- - source: زَاڴُورَة
270
- expected: Zāgūrah
271
-
272
- - source: اِيران
273
- expected: Īrān
274
-
275
- map:
276
- postrules:
277
- - pattern: '(?<=\b)(?<!\b[‘|’])[\u0061-\uFFFF]'
278
- result: "upcase"
279
- # don't capitalize defined article in the middle of a sentence
280
- - pattern : ' At T' # الت
281
- result: ' at T'
282
- - pattern : ' Ath Th' # الث
283
- result: ' ath th'
284
- - pattern : ' Ad D' # الد
285
- result: ' ad D'
286
- - pattern : ' Adh Dh' # الذ
287
- result: ' adh Dh'
288
- - pattern : ' Ar R' # الر
289
- result: ' ar R'
290
- - pattern : ' Az Z' # الز
291
- result: ' az Z'
292
- - pattern : ' As S' # الس
293
- result: ' as S'
294
- - pattern : ' Ash Sh' # الش
295
- result: ' ash Sh'
296
- - pattern : ' Aş Ş' # الص
297
- result: ' aş Ş'
298
- - pattern : ' Aḑ Ḑ' # الض
299
- result: ' aḑ Ḑ'
300
- - pattern : ' Aţ Ţ' # الط
301
- result: ' aţ Ţ'
302
- - pattern : ' Az̧ Z̧' # الظ
303
- result: ' az̧ Z̧'
304
- - pattern : ' Al L' # الل
305
- result: ' al L'
306
- - pattern : ' An N' # الن
307
- result: ' an N'
308
- - pattern: " Al " # ال
309
- result: " al "
310
-
311
- characters:
312
-
313
- # Modified/Non-Standard Arabic Script Characters
314
-
315
- '\u067E': 'p'
316
- '\u0686': 'ch'
317
- '\u06A4': 'v'
318
- # Used in Tunisian Arabic Script.
319
- '\u06A8': 'g'
320
- # Used principally in Iraq, but also sometimes used in other Arabic speaking countries to represent the ‘g’ sound.
321
- '\u06AF': 'g'
322
- # Used in Moroccan Arabic Script.
323
- '\u06B4': 'g' # ڭ
324
- '\u06AD': 'g'
325
-
326
-
327
-
328
- # pointing
329
-
330
-
331
- # Note 11
332
- '\b\u0627\u0648': 'ū' #او
333
- '\b\u0627\u0648\u0652' : 'aw' # اوْ
334
- '\u0627\u0648': 'āw' #او in word medial or final position
335
-
336
- '\b\u0627\u064A': 'ī' # اي in word initial position (see Note 11)
337
- '\u0627\u064A' : 'āy' # اي in word medial or final position
338
-
339
-
340
- '\u064e' : 'a' # َ fatha
341
- '\u064e(?=\u0629)' : '' # َ fatha followed by ta' marboota
342
- '\u064e(?=a[h|t])' : '' # َ fatha followed by ta' marboota, handling different order of conversion
343
- '\u0650' : 'i' # ِ kasra
344
- '\u064f' : 'u' # ُ damma
345
-
346
- '\u064e\u0627' : 'ā' # ـَا fatha followed by ا
347
- '\u0650\u064a' : 'ī' # ـِي kasra followed by ي
348
- '\u064f\u0648' : 'ū' # ـُو damma followed by و
349
- '\u064f\u0648(?=\u064e|u064f)' : 'uw' # ـِي kasra followed by ي
350
- '\u064e\u0649' : 'á' # ـَى fatha followed by ى which is ا not ي Note 5
351
- '\u064e\u0649\u0670' : 'á' # Note 5
352
- '\u0649\u0670': 'á' # See Note 5
353
- '\u0652' : '' # ْ sokoon, not romanized, Indicates absence of short vowel
354
- '\u064e\u064a\u0652' : 'ay' # ـَيْ
355
- '\u064e\u064a' : 'aī' # ـَي
356
-
357
- '\u064e\u0648\u0652' : 'aw' # ـَوْ
358
- '\u064b': '' # See Note 6
359
- '\u064d': '' # See Note 6
360
- '\u064c': '' # See Note 6
361
-
362
-
363
- # special pointed letters
364
- '\u0639\u064e' : '‘a' # عَ
365
- '\u0639\u0650' : '‘i' # عِ
366
- '\u0639\u064f' : '‘ū' # عُ
367
-
368
- # Note 2
369
- '\u0623' : ''
370
- # '\u0623\u064e' : 'a' # أَ
371
- # '\u0625\u0650' : 'i' # إِ
372
- # '\u0623\u064f' : 'u' # أُ
373
- # '\u0623\u064f\u0648' : 'ū' # أُ
374
-
375
- # handle MacOS regex difference
376
- '\u0639\u064f\u0648' : '‘ū' # عُو damma followed by و
377
-
378
- '\u0650\u064a\u0651\u064e' : 'īy' # ـِيَّ
379
- '\u0650\u064a(?=\u064e|u064f)' : 'iy' # ـِي kasra followed by ي
380
-
381
- # not romanized in word-initial position (see Note 2)
382
- '\u0621': '’'
383
-
384
- '\b\u0622' : 'ā' # آ in word initial position (see Notes 3 and 9)
385
- '\u0622': '’ā' # آ in word medial position (see Notes 3 and 9)
386
- '\u0671': '’' # See Note 8
387
- '\u0626' : "’" # ئ
388
- '\u0627': 'ā' # See Notes 3 and 10
389
-
390
- '\b\u0627\u0648' : 'ū' # اُ
391
- '\b\u0627\u0650\u064a' : 'ī' # اي
392
- '\b\u0627\u0644' : 'al ' # ال
393
-
394
- # Sun letters
395
-
396
- '\b\u0627\u0644\u062a\u0651?' : 'at t' # الت
397
- '\b\u0627\u0644\u062b\u0651?' : 'ath th' # الث
398
- '\b\u0627\u0644\u062f\u0651?' : 'ad d' # الد
399
- '\b\u0627\u0644\u0630\u0651?' : 'adh dh' # الذ
400
- '\b\u0627\u0644\u0631\u0651?' : 'ar r' # الر
401
- '\b\u0627\u0644\u0632\u0651?' : 'az z' # الز
402
- '\b\u0627\u0644\u0633\u0651?' : 'as s' # الس
403
- '\b\u0627\u0644\u0634\u0651?' : 'ash sh' # الش
404
- '\b\u0627\u0644\u0635\u0651?' : 'aş ş' # الص
405
- '\b\u0627\u0644\u0636\u0651?' : 'aḑ ḑ' # الض
406
- '\b\u0627\u0644\u0637\u0651?' : 'aţ ţ' # الط
407
- '\b\u0627\u0644\u0638\u0651?' : 'az̧ z̧' # الظ
408
- '\b\u0627\u0644\u0644\u0651?' : 'al l' # الل
409
- '\b\u0627\u0644\u0646\u0651?' : 'an n' # الن
410
-
411
- # shadda Note 7
412
-
413
- '\u0628\u0651' : 'bb' # ب
414
- '\u062a\u0651' : 'tt' # ت
415
- '\u062b\u0651' : 'thth' # ث
416
- '\u062c\u0651' : 'jj' # ج
417
- '\u062d\u0651' : 'ḩḩ' # ح
418
- '\u062e\u0651' : 'khkh' # خ
419
- '\u062f\u0651' : 'dd' # د
420
- '\u0630\u0651' : 'dhdh' # ذ
421
- '\u0631\u0651' : 'rr' # ر
422
- '\u0632\u0651' : 'zz' # ز
423
- '\u0633\u0651' : 'ss' # س
424
- '\u0634\u0651' : 'sh' # ش
425
- '\u0635\u0651' : 'şş' # ص
426
- '\u0636\u0651' : 'ḑḑ' # ض
427
- '\u0637\u0651' : 'ţţ' # ط
428
- '\u0638\u0651' : 'z̧z̧' # ظ
429
- '\u063a\u0651' : 'ghgh' # غ
430
- '\u0641\u0651' : 'ff' # ف
431
- '\u0642\u0651' : 'qq' # ق
432
- '\u0643\u0651' : 'kk' # ك
433
- '\u0644\u0651' : 'll' # ل
434
- '\u0645\u0651' : 'mm' # م
435
- '\u0646\u0651' : 'nn' # ن
436
- '\u0647\u0651' : 'hh' # ه
437
- '\u0648\u0651' : 'ww' # و
438
- '\u064a\u0651' : 'yy' # ي
439
-
440
- # ta' marboota See Note 4
441
-
442
- '\u0629' : 'at' # ة in the middle of the sentence
443
- '\u0629$' : 'ah'
444
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{2})\u0629' : 'ah'
445
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{3})\u0629' : 'ah'
446
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{4})\u0629' : 'ah'
447
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{5})\u0629' : 'ah'
448
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{6})\u0629' : 'ah'
449
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{7})\u0629' : 'ah'
450
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{8})\u0629' : 'ah'
451
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{9})\u0629' : 'ah'
452
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{10})\u0629' : 'ah'
453
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{11})\u0629' : 'ah'
454
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{12})\u0629' : 'ah'
455
- '(?<=\b\u0627\u0644[\u0600-\u06ff]{13})\u0629' : 'ah'
456
-
457
-
458
- # standard consonant characters
459
-
460
- '\u0628' : 'b' # ب
461
- '\uFE91' : 'b' # ﺑ
462
- '\uFE92' : 'b' # ﺒ
463
- '\uFE90' : 'b' # ﺐ
464
-
465
- '\u062a' : 't' # ت
466
- '\ufe97' : 't' # ﺗ
467
- '\ufe98' : 't' # ﺘ
468
- '\ufe96' : 't' # ﺖ
469
-
470
- '\u062b' : 'th' # ث
471
- '\ufe9b' : 'th' # ﺛ
472
- '\ufe9c' : 'th' # ﺜ
473
- '\ufe9a' : 'th' # ﺚ
474
-
475
- '\u062c' : 'j' # ج
476
- '\ufe9f' : 'j' # ﺟ
477
- '\ufea0' : 'j' # ﺠ
478
- '\ufe9e' : 'j' # ﺞ
479
-
480
- '\u062d' : 'ḩ' # ح
481
- '\ufea3' : 'ḩ' # ﺣ
482
- '\ufea4' : 'ḩ' # ﺤ
483
- '\ufea2' : 'ḩ' # ﺢ
484
-
485
- '\u062e' : 'kh' # خ
486
- '\ufea7' : 'kh' # ﺧ
487
- '\ufea8' : 'kh' # ﺨ
488
- '\ufea6' : 'kh' # ﺦ
489
-
490
- '\u062f' : 'd' # د
491
- '\ufeaa' : 'd' # ﺪ
492
-
493
- '\u0630' : 'dh' # ذ
494
- '\ufeac' : 'dh' # ﺬ
495
-
496
- '\u0631' : 'r' # ر
497
- '\ufeae' : 'r' # ﺮ
498
-
499
- '\u0632' : 'z' # ز
500
- '\ufeb0' : 'z' # ﺰ
501
-
502
- '\u0633' : 's' # س
503
- '\ufeb3' : 's' # ﺳ
504
- '\ufeb4' : 's' # ﺴ
505
- '\ufeb2' : 's' # ﺲ
506
-
507
- '\u0634' : 'sh' # ش
508
- '\ufeb7' : 'sh' # ﺷ
509
- '\ufeb8' : 'sh' # ﺸ
510
- '\ufeb6' : 'sh' # ﺶ
511
-
512
- '\u0635' : 'ş' # ص
513
- '\ufebb' : 'ş' # ﺻ
514
- '\ufebc' : 'ş' # ﺼ
515
- '\ufeba' : 'ş' # ﺺ
516
-
517
- '\u0636' : 'ḑ' # ض
518
- '\ufebf' : 'ḑ' # ﺿ
519
- '\ufec0' : 'ḑ' # ﻀ
520
- '\ufebe' : 'ḑ' # ﺾ
521
-
522
- '\u0637' : 'ţ' # ط
523
- '\ufec3' : 'ţ' # ﻃ
524
- '\ufec4' : 'ţ' # ﻄ
525
- '\ufec2' : 'ţ' # ﻂ
526
-
527
- '\u0638' : 'z̧' # ظ
528
- '\ufec7' : 'z̧' # ﻇ
529
- '\ufec8' : 'z̧' # ﻈ
530
- '\ufec6' : 'z̧' # ﻆ
531
-
532
- '\u0639' : '‘' # ع
533
- '\ufecb' : '‘' # ﻋ
534
- '\ufecc' : '‘' # ﻌ
535
- '\ufeca' : '‘' # ﻊ
536
-
537
- '\u063a' : 'gh' # غ
538
- '\ufecf' : 'gh' # ﻏ
539
- '\ufed0' : 'gh' # ﻐ
540
- '\ufece' : 'gh' # ﻎ
541
-
542
- '\u0641' : 'f' # ف
543
- '\ufed3' : 'f' # ﻓ
544
- '\ufed4' : 'f' # ﻔ
545
- '\ufed2' : 'f' # ﻒ
546
-
547
- '\u0642' : 'q' # ق
548
- '\ufed7' : 'q' # ﻗ
549
- '\ufed8' : 'q' # ﻘ
550
- '\ufed6' : 'q' # ﻖ
551
-
552
- '\u0643' : 'k' # ك
553
- '\ufedb' : 'k' # ﻛ
554
- '\ufedc' : 'k' # ﻜ
555
- '\ufeda' : 'k' # ﻚ
556
-
557
- '\u0644' : 'l' # ل
558
- '\ufedf' : 'l' # ﻟ
559
- '\ufee0' : 'l' # ﻠ
560
- '\ufede' : 'l' # ﻞ
561
-
562
- '\u0645' : 'm' # م
563
- '\ufee3' : 'm' # ﻣ
564
- '\ufee4' : 'm' # ﻤ
565
- '\ufee2' : 'm' # ﻢ
566
-
567
- '\u0646' : 'n' # ن
568
- '\ufee7' : 'n' # ﻧ
569
- '\ufee8' : 'n' # ﻨ
570
- '\ufee6' : 'n' # ﻦ
571
-
572
- '\u0647' : 'h' # ه
573
- '\ufeeb' : 'h' # ﻫ
574
- '\ufeec' : 'h' # ﻬ
575
- '\ufeea' : 'h' # ﻪ
576
-
577
- '\u0648' : 'w' # و
578
- '\ufeee' : 'w' # ﻮ
579
-
580
- '\u064a' : 'y' # ي
581
- '\ufef3' : 'y' # ﻳ
582
- '\ufef4' : 'y' # ﻴ
583
- '\ufef1' : 'y' # ﻱ
584
-
585
-
586
- # NUMERALS
587
-
588
- # Although Perso-Arabic script is written from right to left, numerical expressions, e.g. ۱۹٦۸ → 1968, are written from left to right.
589
- '۰': '0'
590
- '۱': '1'
591
- '۲': '2'
592
- '۳': '3'
593
- '٤': '4'
594
- '٥': '5'
595
- '٦': '6'
596
- '۷': '7'
597
- '۸': '8'
598
- '۹': '9'